[pypy-commit] pypy arm-backend-2: merge default

Wed Jul 13 11:01:53 CEST 2011

Author: David Schneider <david.schneider at picle.org>
Branch: arm-backend-2
Changeset: r45526:11ab6fab5723
Date: 2011-07-12 13:46 +0200
http://bitbucket.org/pypy/pypy/changeset/11ab6fab5723/

Log:	merge default

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -2,6 +2,7 @@
 *.py[co]
 *.sw[po]
 *~
+.*.swp
 
 syntax: regexp
 ^testresult$
@@ -39,6 +40,8 @@
 ^pypy/translator/benchmark/shootout_benchmarks$
 ^pypy/translator/goal/pypy-translation-snapshot$
 ^pypy/translator/goal/pypy-c
+^pypy/translator/goal/pypy-jvm
+^pypy/translator/goal/pypy-jvm.jar
 ^pypy/translator/goal/.+\.exe$
 ^pypy/translator/goal/.+\.dll$
 ^pypy/translator/goal/target.+-c$
@@ -65,6 +68,7 @@
 ^pypy/doc/image/lattice3\.png$
 ^pypy/doc/image/stackless_informal\.png$
 ^pypy/doc/image/parsing_example.+\.png$
+^pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test\.o$
 ^compiled
 ^.git/
 ^release/
diff --git a/_pytest/__init__.py b/_pytest/__init__.py
--- a/_pytest/__init__.py
+++ b/_pytest/__init__.py
@@ -1,2 +1,2 @@
 #
-__version__ = '2.0.3'
+__version__ = '2.1.0.dev4'
diff --git a/_pytest/assertion.py b/_pytest/assertion.py
deleted file mode 100644
--- a/_pytest/assertion.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""
-support for presented detailed information in failing assertions.
-"""
-import py
-import sys
-from _pytest.monkeypatch import monkeypatch
-
-def pytest_addoption(parser):
-    group = parser.getgroup("debugconfig")
-    group._addoption('--no-assert', action="store_true", default=False,
-        dest="noassert",
-        help="disable python assert expression reinterpretation."),
-
-def pytest_configure(config):
-    # The _reprcompare attribute on the py.code module is used by
-    # py._code._assertionnew to detect this plugin was loaded and in
-    # turn call the hooks defined here as part of the
-    # DebugInterpreter.
-    m = monkeypatch()
-    config._cleanup.append(m.undo)
-    warn_about_missing_assertion()
-    if not config.getvalue("noassert") and not config.getvalue("nomagic"):
-        def callbinrepr(op, left, right):
-            hook_result = config.hook.pytest_assertrepr_compare(
-                config=config, op=op, left=left, right=right)
-            for new_expl in hook_result:
-                if new_expl:
-                    return '\n~'.join(new_expl)
-        m.setattr(py.builtin.builtins,
-                  'AssertionError', py.code._AssertionError)
-        m.setattr(py.code, '_reprcompare', callbinrepr)
-
-def warn_about_missing_assertion():
-    try:
-        assert False
-    except AssertionError:
-        pass
-    else:
-        sys.stderr.write("WARNING: failing tests may report as passing because "
-        "assertions are turned off!  (are you using python -O?)\n")
-
-# Provide basestring in python3
-try:
-    basestring = basestring
-except NameError:
-    basestring = str
-
-
-def pytest_assertrepr_compare(op, left, right):
-    """return specialised explanations for some operators/operands"""
-    width = 80 - 15 - len(op) - 2 # 15 chars indentation, 1 space around op
-    left_repr = py.io.saferepr(left, maxsize=int(width/2))
-    right_repr = py.io.saferepr(right, maxsize=width-len(left_repr))
-    summary = '%s %s %s' % (left_repr, op, right_repr)
-
-    issequence = lambda x: isinstance(x, (list, tuple))
-    istext = lambda x: isinstance(x, basestring)
-    isdict = lambda x: isinstance(x, dict)
-    isset = lambda x: isinstance(x, set)
-
-    explanation = None
-    try:
-        if op == '==':
-            if istext(left) and istext(right):
-                explanation = _diff_text(left, right)
-            elif issequence(left) and issequence(right):
-                explanation = _compare_eq_sequence(left, right)
-            elif isset(left) and isset(right):
-                explanation = _compare_eq_set(left, right)
-            elif isdict(left) and isdict(right):
-                explanation = _diff_text(py.std.pprint.pformat(left),
-                                         py.std.pprint.pformat(right))
-        elif op == 'not in':
-            if istext(left) and istext(right):
-                explanation = _notin_text(left, right)
-    except py.builtin._sysex:
-        raise
-    except:
-        excinfo = py.code.ExceptionInfo()
-        explanation = ['(pytest_assertion plugin: representation of '
-            'details failed. Probably an object has a faulty __repr__.)',
-            str(excinfo)
-            ]
-
-
-    if not explanation:
-        return None
-
-    # Don't include pageloads of data, should be configurable
-    if len(''.join(explanation)) > 80*8:
-        explanation = ['Detailed information too verbose, truncated']
-
-    return [summary] + explanation
-
-
-def _diff_text(left, right):
-    """Return the explanation for the diff between text
-
-    This will skip leading and trailing characters which are
-    identical to keep the diff minimal.
-    """
-    explanation = []
-    i = 0 # just in case left or right has zero length
-    for i in range(min(len(left), len(right))):
-        if left[i] != right[i]:
-            break
-    if i > 42:
-        i -= 10                 # Provide some context
-        explanation = ['Skipping %s identical '
-                       'leading characters in diff' % i]
-        left = left[i:]
-        right = right[i:]
-    if len(left) == len(right):
-        for i in range(len(left)):
-            if left[-i] != right[-i]:
-                break
-        if i > 42:
-            i -= 10     # Provide some context
-            explanation += ['Skipping %s identical '
-                            'trailing characters in diff' % i]
-            left = left[:-i]
-            right = right[:-i]
-    explanation += [line.strip('\n')
-                    for line in py.std.difflib.ndiff(left.splitlines(),
-                                                     right.splitlines())]
-    return explanation
-
-
-def _compare_eq_sequence(left, right):
-    explanation = []
-    for i in range(min(len(left), len(right))):
-        if left[i] != right[i]:
-            explanation += ['At index %s diff: %r != %r' %
-                            (i, left[i], right[i])]
-            break
-    if len(left) > len(right):
-        explanation += ['Left contains more items, '
-            'first extra item: %s' % py.io.saferepr(left[len(right)],)]
-    elif len(left) < len(right):
-        explanation += ['Right contains more items, '
-            'first extra item: %s' % py.io.saferepr(right[len(left)],)]
-    return explanation # + _diff_text(py.std.pprint.pformat(left),
-                       #             py.std.pprint.pformat(right))
-
-
-def _compare_eq_set(left, right):
-    explanation = []
-    diff_left = left - right
-    diff_right = right - left
-    if diff_left:
-        explanation.append('Extra items in the left set:')
-        for item in diff_left:
-            explanation.append(py.io.saferepr(item))
-    if diff_right:
-        explanation.append('Extra items in the right set:')
-        for item in diff_right:
-            explanation.append(py.io.saferepr(item))
-    return explanation
-
-
-def _notin_text(term, text):
-    index = text.find(term)
-    head = text[:index]
-    tail = text[index+len(term):]
-    correct_text = head + tail
-    diff = _diff_text(correct_text, text)
-    newdiff = ['%s is contained here:' % py.io.saferepr(term, maxsize=42)]
-    for line in diff:
-        if line.startswith('Skipping'):
-            continue
-        if line.startswith('- '):
-            continue
-        if line.startswith('+ '):
-            newdiff.append('  ' + line[2:])
-        else:
-            newdiff.append(line)
-    return newdiff
diff --git a/_pytest/assertion/__init__.py b/_pytest/assertion/__init__.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/__init__.py
@@ -0,0 +1,128 @@
+"""
+support for presenting detailed information in failing assertions.
+"""
+import py
+import imp
+import marshal
+import struct
+import sys
+import pytest
+from _pytest.monkeypatch import monkeypatch
+from _pytest.assertion import reinterpret, util
+
+try:
+    from _pytest.assertion.rewrite import rewrite_asserts
+except ImportError:
+    rewrite_asserts = None
+else:
+    import ast
+
+def pytest_addoption(parser):
+    group = parser.getgroup("debugconfig")
+    group.addoption('--assertmode', action="store", dest="assertmode",
+                    choices=("on", "old", "off", "default"), default="default",
+                    metavar="on|old|off",
+                    help="""control assertion debugging tools.
+'off' performs no assertion debugging.
+'old' reinterprets the expressions in asserts to glean information.
+'on' (the default) rewrites the assert statements in test modules to provide
+sub-expression results.""")
+    group.addoption('--no-assert', action="store_true", default=False,
+        dest="noassert", help="DEPRECATED equivalent to --assertmode=off")
+    group.addoption('--nomagic', action="store_true", default=False,
+        dest="nomagic", help="DEPRECATED equivalent to --assertmode=off")
+
+class AssertionState:
+    """State for the assertion plugin."""
+
+    def __init__(self, config, mode):
+        self.mode = mode
+        self.trace = config.trace.root.get("assertion")
+
+def pytest_configure(config):
+    warn_about_missing_assertion()
+    mode = config.getvalue("assertmode")
+    if config.getvalue("noassert") or config.getvalue("nomagic"):
+        if mode not in ("off", "default"):
+            raise pytest.UsageError("assertion options conflict")
+        mode = "off"
+    elif mode == "default":
+        mode = "on"
+    if mode != "off":
+        def callbinrepr(op, left, right):
+            hook_result = config.hook.pytest_assertrepr_compare(
+                config=config, op=op, left=left, right=right)
+            for new_expl in hook_result:
+                if new_expl:
+                    return '\n~'.join(new_expl)
+        m = monkeypatch()
+        config._cleanup.append(m.undo)
+        m.setattr(py.builtin.builtins, 'AssertionError',
+                  reinterpret.AssertionError)
+        m.setattr(util, '_reprcompare', callbinrepr)
+    if mode == "on" and rewrite_asserts is None:
+        mode = "old"
+    config._assertstate = AssertionState(config, mode)
+    config._assertstate.trace("configured with mode set to %r" % (mode,))
+
+def _write_pyc(co, source_path):
+    if hasattr(imp, "cache_from_source"):
+        # Handle PEP 3147 pycs.
+        pyc = py.path.local(imp.cache_from_source(str(source_path)))
+        pyc.ensure()
+    else:
+        pyc = source_path + "c"
+    mtime = int(source_path.mtime())
+    fp = pyc.open("wb")
+    try:
+        fp.write(imp.get_magic())
+        fp.write(struct.pack("<l", mtime))
+        marshal.dump(co, fp)
+    finally:
+        fp.close()
+    return pyc
+
+def before_module_import(mod):
+    if mod.config._assertstate.mode != "on":
+        return
+    # Some deep magic: load the source, rewrite the asserts, and write a
+    # fake pyc, so that it'll be loaded when the module is imported.
+    source = mod.fspath.read()
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        # Let this pop up again in the real import.
+        mod.config._assertstate.trace("failed to parse: %r" % (mod.fspath,))
+        return
+    rewrite_asserts(tree)
+    try:
+        co = compile(tree, str(mod.fspath), "exec")
+    except SyntaxError:
+        # It's possible that this error is from some bug in the assertion
+        # rewriting, but I don't know of a fast way to tell.
+        mod.config._assertstate.trace("failed to compile: %r" % (mod.fspath,))
+        return
+    mod._pyc = _write_pyc(co, mod.fspath)
+    mod.config._assertstate.trace("wrote pyc: %r" % (mod._pyc,))
+
+def after_module_import(mod):
+    if not hasattr(mod, "_pyc"):
+        return
+    state = mod.config._assertstate
+    try:
+        mod._pyc.remove()
+    except py.error.ENOENT:
+        state.trace("couldn't find pyc: %r" % (mod._pyc,))
+    else:
+        state.trace("removed pyc: %r" % (mod._pyc,))
+
+def warn_about_missing_assertion():
+    try:
+        assert False
+    except AssertionError:
+        pass
+    else:
+        sys.stderr.write("WARNING: failing tests may report as passing because "
+        "assertions are turned off!  (are you using python -O?)\n")
+
+pytest_assertrepr_compare = util.assertrepr_compare
diff --git a/_pytest/assertion/newinterpret.py b/_pytest/assertion/newinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/newinterpret.py
@@ -0,0 +1,333 @@
+"""
+Find intermediate evalutation results in assert statements through builtin AST.
+This should replace oldinterpret.py eventually.
+"""
+
+import sys
+import ast
+
+import py
+from _pytest.assertion import util
+from _pytest.assertion.reinterpret import BuiltinAssertionError
+
+
+if sys.platform.startswith("java") and sys.version_info < (2, 5, 2):
+    # See http://bugs.jython.org/issue1497
+    _exprs = ("BoolOp", "BinOp", "UnaryOp", "Lambda", "IfExp", "Dict",
+              "ListComp", "GeneratorExp", "Yield", "Compare", "Call",
+              "Repr", "Num", "Str", "Attribute", "Subscript", "Name",
+              "List", "Tuple")
+    _stmts = ("FunctionDef", "ClassDef", "Return", "Delete", "Assign",
+              "AugAssign", "Print", "For", "While", "If", "With", "Raise",
+              "TryExcept", "TryFinally", "Assert", "Import", "ImportFrom",
+              "Exec", "Global", "Expr", "Pass", "Break", "Continue")
+    _expr_nodes = set(getattr(ast, name) for name in _exprs)
+    _stmt_nodes = set(getattr(ast, name) for name in _stmts)
+    def _is_ast_expr(node):
+        return node.__class__ in _expr_nodes
+    def _is_ast_stmt(node):
+        return node.__class__ in _stmt_nodes
+else:
+    def _is_ast_expr(node):
+        return isinstance(node, ast.expr)
+    def _is_ast_stmt(node):
+        return isinstance(node, ast.stmt)
+
+
+class Failure(Exception):
+    """Error found while interpreting AST."""
+
+    def __init__(self, explanation=""):
+        self.cause = sys.exc_info()
+        self.explanation = explanation
+
+
+def interpret(source, frame, should_fail=False):
+    mod = ast.parse(source)
+    visitor = DebugInterpreter(frame)
+    try:
+        visitor.visit(mod)
+    except Failure:
+        failure = sys.exc_info()[1]
+        return getfailure(failure)
+    if should_fail:
+        return ("(assertion failed, but when it was re-run for "
+                "printing intermediate values, it did not fail.  Suggestions: "
+                "compute assert expression before the assert or use --no-assert)")
+
+def run(offending_line, frame=None):
+    if frame is None:
+        frame = py.code.Frame(sys._getframe(1))
+    return interpret(offending_line, frame)
+
+def getfailure(e):
+    explanation = util.format_explanation(e.explanation)
+    value = e.cause[1]
+    if str(value):
+        lines = explanation.split('\n')
+        lines[0] += "  << %s" % (value,)
+        explanation = '\n'.join(lines)
+    text = "%s: %s" % (e.cause[0].__name__, explanation)
+    if text.startswith('AssertionError: assert '):
+        text = text[16:]
+    return text
+
+operator_map = {
+    ast.BitOr : "|",
+    ast.BitXor : "^",
+    ast.BitAnd : "&",
+    ast.LShift : "<<",
+    ast.RShift : ">>",
+    ast.Add : "+",
+    ast.Sub : "-",
+    ast.Mult : "*",
+    ast.Div : "/",
+    ast.FloorDiv : "//",
+    ast.Mod : "%",
+    ast.Eq : "==",
+    ast.NotEq : "!=",
+    ast.Lt : "<",
+    ast.LtE : "<=",
+    ast.Gt : ">",
+    ast.GtE : ">=",
+    ast.Pow : "**",
+    ast.Is : "is",
+    ast.IsNot : "is not",
+    ast.In : "in",
+    ast.NotIn : "not in"
+}
+
+unary_map = {
+    ast.Not : "not %s",
+    ast.Invert : "~%s",
+    ast.USub : "-%s",
+    ast.UAdd : "+%s"
+}
+
+
+class DebugInterpreter(ast.NodeVisitor):
+    """Interpret AST nodes to gleam useful debugging information. """
+
+    def __init__(self, frame):
+        self.frame = frame
+
+    def generic_visit(self, node):
+        # Fallback when we don't have a special implementation.
+        if _is_ast_expr(node):
+            mod = ast.Expression(node)
+            co = self._compile(mod)
+            try:
+                result = self.frame.eval(co)
+            except Exception:
+                raise Failure()
+            explanation = self.frame.repr(result)
+            return explanation, result
+        elif _is_ast_stmt(node):
+            mod = ast.Module([node])
+            co = self._compile(mod, "exec")
+            try:
+                self.frame.exec_(co)
+            except Exception:
+                raise Failure()
+            return None, None
+        else:
+            raise AssertionError("can't handle %s" %(node,))
+
+    def _compile(self, source, mode="eval"):
+        return compile(source, "<assertion interpretation>", mode)
+
+    def visit_Expr(self, expr):
+        return self.visit(expr.value)
+
+    def visit_Module(self, mod):
+        for stmt in mod.body:
+            self.visit(stmt)
+
+    def visit_Name(self, name):
+        explanation, result = self.generic_visit(name)
+        # See if the name is local.
+        source = "%r in locals() is not globals()" % (name.id,)
+        co = self._compile(source)
+        try:
+            local = self.frame.eval(co)
+        except Exception:
+            # have to assume it isn't
+            local = None
+        if local is None or not self.frame.is_true(local):
+            return name.id, result
+        return explanation, result
+
+    def visit_Compare(self, comp):
+        left = comp.left
+        left_explanation, left_result = self.visit(left)
+        for op, next_op in zip(comp.ops, comp.comparators):
+            next_explanation, next_result = self.visit(next_op)
+            op_symbol = operator_map[op.__class__]
+            explanation = "%s %s %s" % (left_explanation, op_symbol,
+                                        next_explanation)
+            source = "__exprinfo_left %s __exprinfo_right" % (op_symbol,)
+            co = self._compile(source)
+            try:
+                result = self.frame.eval(co, __exprinfo_left=left_result,
+                                         __exprinfo_right=next_result)
+            except Exception:
+                raise Failure(explanation)
+            try:
+                if not self.frame.is_true(result):
+                    break
+            except KeyboardInterrupt:
+                raise
+            except:
+                break
+            left_explanation, left_result = next_explanation, next_result
+
+        if util._reprcompare is not None:
+            res = util._reprcompare(op_symbol, left_result, next_result)
+            if res:
+                explanation = res
+        return explanation, result
+
+    def visit_BoolOp(self, boolop):
+        is_or = isinstance(boolop.op, ast.Or)
+        explanations = []
+        for operand in boolop.values:
+            explanation, result = self.visit(operand)
+            explanations.append(explanation)
+            if result == is_or:
+                break
+        name = is_or and " or " or " and "
+        explanation = "(" + name.join(explanations) + ")"
+        return explanation, result
+
+    def visit_UnaryOp(self, unary):
+        pattern = unary_map[unary.op.__class__]
+        operand_explanation, operand_result = self.visit(unary.operand)
+        explanation = pattern % (operand_explanation,)
+        co = self._compile(pattern % ("__exprinfo_expr",))
+        try:
+            result = self.frame.eval(co, __exprinfo_expr=operand_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, result
+
+    def visit_BinOp(self, binop):
+        left_explanation, left_result = self.visit(binop.left)
+        right_explanation, right_result = self.visit(binop.right)
+        symbol = operator_map[binop.op.__class__]
+        explanation = "(%s %s %s)" % (left_explanation, symbol,
+                                      right_explanation)
+        source = "__exprinfo_left %s __exprinfo_right" % (symbol,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, __exprinfo_left=left_result,
+                                     __exprinfo_right=right_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, result
+
+    def visit_Call(self, call):
+        func_explanation, func = self.visit(call.func)
+        arg_explanations = []
+        ns = {"__exprinfo_func" : func}
+        arguments = []
+        for arg in call.args:
+            arg_explanation, arg_result = self.visit(arg)
+            arg_name = "__exprinfo_%s" % (len(ns),)
+            ns[arg_name] = arg_result
+            arguments.append(arg_name)
+            arg_explanations.append(arg_explanation)
+        for keyword in call.keywords:
+            arg_explanation, arg_result = self.visit(keyword.value)
+            arg_name = "__exprinfo_%s" % (len(ns),)
+            ns[arg_name] = arg_result
+            keyword_source = "%s=%%s" % (keyword.arg)
+            arguments.append(keyword_source % (arg_name,))
+            arg_explanations.append(keyword_source % (arg_explanation,))
+        if call.starargs:
+            arg_explanation, arg_result = self.visit(call.starargs)
+            arg_name = "__exprinfo_star"
+            ns[arg_name] = arg_result
+            arguments.append("*%s" % (arg_name,))
+            arg_explanations.append("*%s" % (arg_explanation,))
+        if call.kwargs:
+            arg_explanation, arg_result = self.visit(call.kwargs)
+            arg_name = "__exprinfo_kwds"
+            ns[arg_name] = arg_result
+            arguments.append("**%s" % (arg_name,))
+            arg_explanations.append("**%s" % (arg_explanation,))
+        args_explained = ", ".join(arg_explanations)
+        explanation = "%s(%s)" % (func_explanation, args_explained)
+        args = ", ".join(arguments)
+        source = "__exprinfo_func(%s)" % (args,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, **ns)
+        except Exception:
+            raise Failure(explanation)
+        pattern = "%s\n{%s = %s\n}"
+        rep = self.frame.repr(result)
+        explanation = pattern % (rep, rep, explanation)
+        return explanation, result
+
+    def _is_builtin_name(self, name):
+        pattern = "%r not in globals() and %r not in locals()"
+        source = pattern % (name.id, name.id)
+        co = self._compile(source)
+        try:
+            return self.frame.eval(co)
+        except Exception:
+            return False
+
+    def visit_Attribute(self, attr):
+        if not isinstance(attr.ctx, ast.Load):
+            return self.generic_visit(attr)
+        source_explanation, source_result = self.visit(attr.value)
+        explanation = "%s.%s" % (source_explanation, attr.attr)
+        source = "__exprinfo_expr.%s" % (attr.attr,)
+        co = self._compile(source)
+        try:
+            result = self.frame.eval(co, __exprinfo_expr=source_result)
+        except Exception:
+            raise Failure(explanation)
+        explanation = "%s\n{%s = %s.%s\n}" % (self.frame.repr(result),
+                                              self.frame.repr(result),
+                                              source_explanation, attr.attr)
+        # Check if the attr is from an instance.
+        source = "%r in getattr(__exprinfo_expr, '__dict__', {})"
+        source = source % (attr.attr,)
+        co = self._compile(source)
+        try:
+            from_instance = self.frame.eval(co, __exprinfo_expr=source_result)
+        except Exception:
+            from_instance = None
+        if from_instance is None or self.frame.is_true(from_instance):
+            rep = self.frame.repr(result)
+            pattern = "%s\n{%s = %s\n}"
+            explanation = pattern % (rep, rep, explanation)
+        return explanation, result
+
+    def visit_Assert(self, assrt):
+        test_explanation, test_result = self.visit(assrt.test)
+        explanation = "assert %s" % (test_explanation,)
+        if not self.frame.is_true(test_result):
+            try:
+                raise BuiltinAssertionError
+            except Exception:
+                raise Failure(explanation)
+        return explanation, test_result
+
+    def visit_Assign(self, assign):
+        value_explanation, value_result = self.visit(assign.value)
+        explanation = "... = %s" % (value_explanation,)
+        name = ast.Name("__exprinfo_expr", ast.Load(),
+                        lineno=assign.value.lineno,
+                        col_offset=assign.value.col_offset)
+        new_assign = ast.Assign(assign.targets, name, lineno=assign.lineno,
+                                col_offset=assign.col_offset)
+        mod = ast.Module([new_assign])
+        co = self._compile(mod, "exec")
+        try:
+            self.frame.exec_(co, __exprinfo_expr=value_result)
+        except Exception:
+            raise Failure(explanation)
+        return explanation, value_result
diff --git a/_pytest/assertion/oldinterpret.py b/_pytest/assertion/oldinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/oldinterpret.py
@@ -0,0 +1,552 @@
+import py
+import sys, inspect
+from compiler import parse, ast, pycodegen
+from _pytest.assertion.util import format_explanation
+from _pytest.assertion.reinterpret import BuiltinAssertionError
+
+passthroughex = py.builtin._sysex
+
+class Failure:
+    def __init__(self, node):
+        self.exc, self.value, self.tb = sys.exc_info()
+        self.node = node
+
+class View(object):
+    """View base class.
+
+    If C is a subclass of View, then C(x) creates a proxy object around
+    the object x.  The actual class of the proxy is not C in general,
+    but a *subclass* of C determined by the rules below.  To avoid confusion
+    we call view class the class of the proxy (a subclass of C, so of View)
+    and object class the class of x.
+
+    Attributes and methods not found in the proxy are automatically read on x.
+    Other operations like setting attributes are performed on the proxy, as
+    determined by its view class.  The object x is available from the proxy
+    as its __obj__ attribute.
+
+    The view class selection is determined by the __view__ tuples and the
+    optional __viewkey__ method.  By default, the selected view class is the
+    most specific subclass of C whose __view__ mentions the class of x.
+    If no such subclass is found, the search proceeds with the parent
+    object classes.  For example, C(True) will first look for a subclass
+    of C with __view__ = (..., bool, ...) and only if it doesn't find any
+    look for one with __view__ = (..., int, ...), and then ..., object,...
+    If everything fails the class C itself is considered to be the default.
+
+    Alternatively, the view class selection can be driven by another aspect
+    of the object x, instead of the class of x, by overriding __viewkey__.
+    See last example at the end of this module.
+    """
+
+    _viewcache = {}
+    __view__ = ()
+
+    def __new__(rootclass, obj, *args, **kwds):
+        self = object.__new__(rootclass)
+        self.__obj__ = obj
+        self.__rootclass__ = rootclass
+        key = self.__viewkey__()
+        try:
+            self.__class__ = self._viewcache[key]
+        except KeyError:
+            self.__class__ = self._selectsubclass(key)
+        return self
+
+    def __getattr__(self, attr):
+        # attributes not found in the normal hierarchy rooted on View
+        # are looked up in the object's real class
+        return getattr(self.__obj__, attr)
+
+    def __viewkey__(self):
+        return self.__obj__.__class__
+
+    def __matchkey__(self, key, subclasses):
+        if inspect.isclass(key):
+            keys = inspect.getmro(key)
+        else:
+            keys = [key]
+        for key in keys:
+            result = [C for C in subclasses if key in C.__view__]
+            if result:
+                return result
+        return []
+
+    def _selectsubclass(self, key):
+        subclasses = list(enumsubclasses(self.__rootclass__))
+        for C in subclasses:
+            if not isinstance(C.__view__, tuple):
+                C.__view__ = (C.__view__,)
+        choices = self.__matchkey__(key, subclasses)
+        if not choices:
+            return self.__rootclass__
+        elif len(choices) == 1:
+            return choices[0]
+        else:
+            # combine the multiple choices
+            return type('?', tuple(choices), {})
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__rootclass__.__name__, self.__obj__)
+
+
+def enumsubclasses(cls):
+    for subcls in cls.__subclasses__():
+        for subsubclass in enumsubclasses(subcls):
+            yield subsubclass
+    yield cls
+
+
+class Interpretable(View):
+    """A parse tree node with a few extra methods."""
+    explanation = None
+
+    def is_builtin(self, frame):
+        return False
+
+    def eval(self, frame):
+        # fall-back for unknown expression nodes
+        try:
+            expr = ast.Expression(self.__obj__)
+            expr.filename = '<eval>'
+            self.__obj__.filename = '<eval>'
+            co = pycodegen.ExpressionCodeGenerator(expr).getCode()
+            result = frame.eval(co)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        self.result = result
+        self.explanation = self.explanation or frame.repr(self.result)
+
+    def run(self, frame):
+        # fall-back for unknown statement nodes
+        try:
+            expr = ast.Module(None, ast.Stmt([self.__obj__]))
+            expr.filename = '<run>'
+            co = pycodegen.ModuleCodeGenerator(expr).getCode()
+            frame.exec_(co)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+
+    def nice_explanation(self):
+        return format_explanation(self.explanation)
+
+
+class Name(Interpretable):
+    __view__ = ast.Name
+
+    def is_local(self, frame):
+        source = '%r in locals() is not globals()' % self.name
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def is_global(self, frame):
+        source = '%r in globals()' % self.name
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def is_builtin(self, frame):
+        source = '%r not in locals() and %r not in globals()' % (
+            self.name, self.name)
+        try:
+            return frame.is_true(frame.eval(source))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def eval(self, frame):
+        super(Name, self).eval(frame)
+        if not self.is_local(frame):
+            self.explanation = self.name
+
+class Compare(Interpretable):
+    __view__ = ast.Compare
+
+    def eval(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        for operation, expr2 in self.ops:
+            if hasattr(self, 'result'):
+                # shortcutting in chained expressions
+                if not frame.is_true(self.result):
+                    break
+            expr2 = Interpretable(expr2)
+            expr2.eval(frame)
+            self.explanation = "%s %s %s" % (
+                expr.explanation, operation, expr2.explanation)
+            source = "__exprinfo_left %s __exprinfo_right" % operation
+            try:
+                self.result = frame.eval(source,
+                                         __exprinfo_left=expr.result,
+                                         __exprinfo_right=expr2.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+            expr = expr2
+
+class And(Interpretable):
+    __view__ = ast.And
+
+    def eval(self, frame):
+        explanations = []
+        for expr in self.nodes:
+            expr = Interpretable(expr)
+            expr.eval(frame)
+            explanations.append(expr.explanation)
+            self.result = expr.result
+            if not frame.is_true(expr.result):
+                break
+        self.explanation = '(' + ' and '.join(explanations) + ')'
+
+class Or(Interpretable):
+    __view__ = ast.Or
+
+    def eval(self, frame):
+        explanations = []
+        for expr in self.nodes:
+            expr = Interpretable(expr)
+            expr.eval(frame)
+            explanations.append(expr.explanation)
+            self.result = expr.result
+            if frame.is_true(expr.result):
+                break
+        self.explanation = '(' + ' or '.join(explanations) + ')'
+
+
+# == Unary operations ==
+keepalive = []
+for astclass, astpattern in {
+    ast.Not    : 'not __exprinfo_expr',
+    ast.Invert : '(~__exprinfo_expr)',
+    }.items():
+
+    class UnaryArith(Interpretable):
+        __view__ = astclass
+
+        def eval(self, frame, astpattern=astpattern):
+            expr = Interpretable(self.expr)
+            expr.eval(frame)
+            self.explanation = astpattern.replace('__exprinfo_expr',
+                                                  expr.explanation)
+            try:
+                self.result = frame.eval(astpattern,
+                                         __exprinfo_expr=expr.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+    keepalive.append(UnaryArith)
+
+# == Binary operations ==
+for astclass, astpattern in {
+    ast.Add    : '(__exprinfo_left + __exprinfo_right)',
+    ast.Sub    : '(__exprinfo_left - __exprinfo_right)',
+    ast.Mul    : '(__exprinfo_left * __exprinfo_right)',
+    ast.Div    : '(__exprinfo_left / __exprinfo_right)',
+    ast.Mod    : '(__exprinfo_left % __exprinfo_right)',
+    ast.Power  : '(__exprinfo_left ** __exprinfo_right)',
+    }.items():
+
+    class BinaryArith(Interpretable):
+        __view__ = astclass
+
+        def eval(self, frame, astpattern=astpattern):
+            left = Interpretable(self.left)
+            left.eval(frame)
+            right = Interpretable(self.right)
+            right.eval(frame)
+            self.explanation = (astpattern
+                                .replace('__exprinfo_left',  left .explanation)
+                                .replace('__exprinfo_right', right.explanation))
+            try:
+                self.result = frame.eval(astpattern,
+                                         __exprinfo_left=left.result,
+                                         __exprinfo_right=right.result)
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+    keepalive.append(BinaryArith)
+
+
+class CallFunc(Interpretable):
+    __view__ = ast.CallFunc
+
+    def is_bool(self, frame):
+        source = 'isinstance(__exprinfo_value, bool)'
+        try:
+            return frame.is_true(frame.eval(source,
+                                            __exprinfo_value=self.result))
+        except passthroughex:
+            raise
+        except:
+            return False
+
+    def eval(self, frame):
+        node = Interpretable(self.node)
+        node.eval(frame)
+        explanations = []
+        vars = {'__exprinfo_fn': node.result}
+        source = '__exprinfo_fn('
+        for a in self.args:
+            if isinstance(a, ast.Keyword):
+                keyword = a.name
+                a = a.expr
+            else:
+                keyword = None
+            a = Interpretable(a)
+            a.eval(frame)
+            argname = '__exprinfo_%d' % len(vars)
+            vars[argname] = a.result
+            if keyword is None:
+                source += argname + ','
+                explanations.append(a.explanation)
+            else:
+                source += '%s=%s,' % (keyword, argname)
+                explanations.append('%s=%s' % (keyword, a.explanation))
+        if self.star_args:
+            star_args = Interpretable(self.star_args)
+            star_args.eval(frame)
+            argname = '__exprinfo_star'
+            vars[argname] = star_args.result
+            source += '*' + argname + ','
+            explanations.append('*' + star_args.explanation)
+        if self.dstar_args:
+            dstar_args = Interpretable(self.dstar_args)
+            dstar_args.eval(frame)
+            argname = '__exprinfo_kwds'
+            vars[argname] = dstar_args.result
+            source += '**' + argname + ','
+            explanations.append('**' + dstar_args.explanation)
+        self.explanation = "%s(%s)" % (
+            node.explanation, ', '.join(explanations))
+        if source.endswith(','):
+            source = source[:-1]
+        source += ')'
+        try:
+            self.result = frame.eval(source, **vars)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        if not node.is_builtin(frame) or not self.is_bool(frame):
+            r = frame.repr(self.result)
+            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
+
+class Getattr(Interpretable):
+    __view__ = ast.Getattr
+
+    def eval(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        source = '__exprinfo_expr.%s' % self.attrname
+        try:
+            self.result = frame.eval(source, __exprinfo_expr=expr.result)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+        self.explanation = '%s.%s' % (expr.explanation, self.attrname)
+        # if the attribute comes from the instance, its value is interesting
+        source = ('hasattr(__exprinfo_expr, "__dict__") and '
+                  '%r in __exprinfo_expr.__dict__' % self.attrname)
+        try:
+            from_instance = frame.is_true(
+                frame.eval(source, __exprinfo_expr=expr.result))
+        except passthroughex:
+            raise
+        except:
+            from_instance = True
+        if from_instance:
+            r = frame.repr(self.result)
+            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
+
+# == Re-interpretation of full statements ==
+
+class Assert(Interpretable):
+    __view__ = ast.Assert
+
+    def run(self, frame):
+        test = Interpretable(self.test)
+        test.eval(frame)
+        # print the result as  'assert <explanation>'
+        self.result = test.result
+        self.explanation = 'assert ' + test.explanation
+        if not frame.is_true(test.result):
+            try:
+                raise BuiltinAssertionError
+            except passthroughex:
+                raise
+            except:
+                raise Failure(self)
+
+class Assign(Interpretable):
+    __view__ = ast.Assign
+
+    def run(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        self.result = expr.result
+        self.explanation = '... = ' + expr.explanation
+        # fall-back-run the rest of the assignment
+        ass = ast.Assign(self.nodes, ast.Name('__exprinfo_expr'))
+        mod = ast.Module(None, ast.Stmt([ass]))
+        mod.filename = '<run>'
+        co = pycodegen.ModuleCodeGenerator(mod).getCode()
+        try:
+            frame.exec_(co, __exprinfo_expr=expr.result)
+        except passthroughex:
+            raise
+        except:
+            raise Failure(self)
+
+class Discard(Interpretable):
+    __view__ = ast.Discard
+
+    def run(self, frame):
+        expr = Interpretable(self.expr)
+        expr.eval(frame)
+        self.result = expr.result
+        self.explanation = expr.explanation
+
+class Stmt(Interpretable):
+    __view__ = ast.Stmt
+
+    def run(self, frame):
+        for stmt in self.nodes:
+            stmt = Interpretable(stmt)
+            stmt.run(frame)
+
+
+def report_failure(e):
+    explanation = e.node.nice_explanation()
+    if explanation:
+        explanation = ", in: " + explanation
+    else:
+        explanation = ""
+    sys.stdout.write("%s: %s%s\n" % (e.exc.__name__, e.value, explanation))
+
+def check(s, frame=None):
+    if frame is None:
+        frame = sys._getframe(1)
+        frame = py.code.Frame(frame)
+    expr = parse(s, 'eval')
+    assert isinstance(expr, ast.Expression)
+    node = Interpretable(expr.node)
+    try:
+        node.eval(frame)
+    except passthroughex:
+        raise
+    except Failure:
+        e = sys.exc_info()[1]
+        report_failure(e)
+    else:
+        if not frame.is_true(node.result):
+            sys.stderr.write("assertion failed: %s\n" % node.nice_explanation())
+
+
+###########################################################
+# API / Entry points
+# #########################################################
+
+def interpret(source, frame, should_fail=False):
+    module = Interpretable(parse(source, 'exec').node)
+    #print "got module", module
+    if isinstance(frame, py.std.types.FrameType):
+        frame = py.code.Frame(frame)
+    try:
+        module.run(frame)
+    except Failure:
+        e = sys.exc_info()[1]
+        return getfailure(e)
+    except passthroughex:
+        raise
+    except:
+        import traceback
+        traceback.print_exc()
+    if should_fail:
+        return ("(assertion failed, but when it was re-run for "
+                "printing intermediate values, it did not fail.  Suggestions: "
+                "compute assert expression before the assert or use --nomagic)")
+    else:
+        return None
+
+def getmsg(excinfo):
+    if isinstance(excinfo, tuple):
+        excinfo = py.code.ExceptionInfo(excinfo)
+    #frame, line = gettbline(tb)
+    #frame = py.code.Frame(frame)
+    #return interpret(line, frame)
+
+    tb = excinfo.traceback[-1]
+    source = str(tb.statement).strip()
+    x = interpret(source, tb.frame, should_fail=True)
+    if not isinstance(x, str):
+        raise TypeError("interpret returned non-string %r" % (x,))
+    return x
+
+def getfailure(e):
+    explanation = e.node.nice_explanation()
+    if str(e.value):
+        lines = explanation.split('\n')
+        lines[0] += "  << %s" % (e.value,)
+        explanation = '\n'.join(lines)
+    text = "%s: %s" % (e.exc.__name__, explanation)
+    if text.startswith('AssertionError: assert '):
+        text = text[16:]
+    return text
+
+def run(s, frame=None):
+    if frame is None:
+        frame = sys._getframe(1)
+        frame = py.code.Frame(frame)
+    module = Interpretable(parse(s, 'exec').node)
+    try:
+        module.run(frame)
+    except Failure:
+        e = sys.exc_info()[1]
+        report_failure(e)
+
+
+if __name__ == '__main__':
+    # example:
+    def f():
+        return 5
+    def g():
+        return 3
+    def h(x):
+        return 'never'
+    check("f() * g() == 5")
+    check("not f()")
+    check("not (f() and g() or 0)")
+    check("f() == g()")
+    i = 4
+    check("i == f()")
+    check("len(f()) == 0")
+    check("isinstance(2+3+4, float)")
+
+    run("x = i")
+    check("x == 5")
+
+    run("assert not f(), 'oops'")
+    run("a, b, c = 1, 2")
+    run("a, b, c = f()")
+
+    check("max([f(),g()]) == 4")
+    check("'hello'[g()] == 'h'")
+    run("'guk%d' % h(f())")
diff --git a/_pytest/assertion/reinterpret.py b/_pytest/assertion/reinterpret.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/reinterpret.py
@@ -0,0 +1,48 @@
+import sys
+import py
+
+BuiltinAssertionError = py.builtin.builtins.AssertionError
+
+class AssertionError(BuiltinAssertionError):
+    def __init__(self, *args):
+        BuiltinAssertionError.__init__(self, *args)
+        if args:
+            try:
+                self.msg = str(args[0])
+            except py.builtin._sysex:
+                raise
+            except:
+                self.msg = "<[broken __repr__] %s at %0xd>" %(
+                    args[0].__class__, id(args[0]))
+        else:
+            f = py.code.Frame(sys._getframe(1))
+            try:
+                source = f.code.fullsource
+                if source is not None:
+                    try:
+                        source = source.getstatement(f.lineno, assertion=True)
+                    except IndexError:
+                        source = None
+                    else:
+                        source = str(source.deindent()).strip()
+            except py.error.ENOENT:
+                source = None
+                # this can also occur during reinterpretation, when the
+                # co_filename is set to "<run>".
+            if source:
+                self.msg = reinterpret(source, f, should_fail=True)
+            else:
+                self.msg = "<could not determine information>"
+            if not self.args:
+                self.args = (self.msg,)
+
+if sys.version_info > (3, 0):
+    AssertionError.__module__ = "builtins"
+    reinterpret_old = "old reinterpretation not available for py3"
+else:
+    from _pytest.assertion.oldinterpret import interpret as reinterpret_old
+if sys.version_info >= (2, 6) or (sys.platform.startswith("java")):
+    from _pytest.assertion.newinterpret import interpret as reinterpret
+else:
+    reinterpret = reinterpret_old
+
diff --git a/_pytest/assertion/rewrite.py b/_pytest/assertion/rewrite.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/rewrite.py
@@ -0,0 +1,340 @@
+"""Rewrite assertion AST to produce nice error messages"""
+
+import ast
+import collections
+import itertools
+import sys
+
+import py
+from _pytest.assertion import util
+
+
+def rewrite_asserts(mod):
+    """Rewrite the assert statements in mod."""
+    AssertionRewriter().run(mod)
+
+
+_saferepr = py.io.saferepr
+from _pytest.assertion.util import format_explanation as _format_explanation
+
+def _format_boolop(operands, explanations, is_or):
+    show_explanations = []
+    for operand, expl in zip(operands, explanations):
+        show_explanations.append(expl)
+        if operand == is_or:
+            break
+    return "(" + (is_or and " or " or " and ").join(show_explanations) + ")"
+
+def _call_reprcompare(ops, results, expls, each_obj):
+    for i, res, expl in zip(range(len(ops)), results, expls):
+        try:
+            done = not res
+        except Exception:
+            done = True
+        if done:
+            break
+    if util._reprcompare is not None:
+        custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
+        if custom is not None:
+            return custom
+    return expl
+
+
+unary_map = {
+    ast.Not : "not %s",
+    ast.Invert : "~%s",
+    ast.USub : "-%s",
+    ast.UAdd : "+%s"
+}
+
+binop_map = {
+    ast.BitOr : "|",
+    ast.BitXor : "^",
+    ast.BitAnd : "&",
+    ast.LShift : "<<",
+    ast.RShift : ">>",
+    ast.Add : "+",
+    ast.Sub : "-",
+    ast.Mult : "*",
+    ast.Div : "/",
+    ast.FloorDiv : "//",
+    ast.Mod : "%",
+    ast.Eq : "==",
+    ast.NotEq : "!=",
+    ast.Lt : "<",
+    ast.LtE : "<=",
+    ast.Gt : ">",
+    ast.GtE : ">=",
+    ast.Pow : "**",
+    ast.Is : "is",
+    ast.IsNot : "is not",
+    ast.In : "in",
+    ast.NotIn : "not in"
+}
+
+
+def set_location(node, lineno, col_offset):
+    """Set node location information recursively."""
+    def _fix(node, lineno, col_offset):
+        if "lineno" in node._attributes:
+            node.lineno = lineno
+        if "col_offset" in node._attributes:
+            node.col_offset = col_offset
+        for child in ast.iter_child_nodes(node):
+            _fix(child, lineno, col_offset)
+    _fix(node, lineno, col_offset)
+    return node
+
+
+class AssertionRewriter(ast.NodeVisitor):
+
+    def run(self, mod):
+        """Find all assert statements in *mod* and rewrite them."""
+        if not mod.body:
+            # Nothing to do.
+            return
+        # Insert some special imports at the top of the module but after any
+        # docstrings and __future__ imports.
+        aliases = [ast.alias(py.builtin.builtins.__name__, "@py_builtins"),
+                   ast.alias("_pytest.assertion.rewrite", "@pytest_ar")]
+        expect_docstring = True
+        pos = 0
+        lineno = 0
+        for item in mod.body:
+            if (expect_docstring and isinstance(item, ast.Expr) and
+                isinstance(item.value, ast.Str)):
+                doc = item.value.s
+                if "PYTEST_DONT_REWRITE" in doc:
+                    # The module has disabled assertion rewriting.
+                    return
+                lineno += len(doc) - 1
+                expect_docstring = False
+            elif (not isinstance(item, ast.ImportFrom) or item.level > 0 and
+                  item.identifier != "__future__"):
+                lineno = item.lineno
+                break
+            pos += 1
+        imports = [ast.Import([alias], lineno=lineno, col_offset=0)
+                   for alias in aliases]
+        mod.body[pos:pos] = imports
+        # Collect asserts.
+        nodes = collections.deque([mod])
+        while nodes:
+            node = nodes.popleft()
+            for name, field in ast.iter_fields(node):
+                if isinstance(field, list):
+                    new = []
+                    for i, child in enumerate(field):
+                        if isinstance(child, ast.Assert):
+                            # Transform assert.
+                            new.extend(self.visit(child))
+                        else:
+                            new.append(child)
+                            if isinstance(child, ast.AST):
+                                nodes.append(child)
+                    setattr(node, name, new)
+                elif (isinstance(field, ast.AST) and
+                      # Don't recurse into expressions as they can't contain
+                      # asserts.
+                      not isinstance(field, ast.expr)):
+                    nodes.append(field)
+
+    def variable(self):
+        """Get a new variable."""
+        # Use a character invalid in python identifiers to avoid clashing.
+        name = "@py_assert" + str(next(self.variable_counter))
+        self.variables.add(name)
+        return name
+
+    def assign(self, expr):
+        """Give *expr* a name."""
+        name = self.variable()
+        self.statements.append(ast.Assign([ast.Name(name, ast.Store())], expr))
+        return ast.Name(name, ast.Load())
+
+    def display(self, expr):
+        """Call py.io.saferepr on the expression."""
+        return self.helper("saferepr", expr)
+
+    def helper(self, name, *args):
+        """Call a helper in this module."""
+        py_name = ast.Name("@pytest_ar", ast.Load())
+        attr = ast.Attribute(py_name, "_" + name, ast.Load())
+        return ast.Call(attr, list(args), [], None, None)
+
+    def builtin(self, name):
+        """Return the builtin called *name*."""
+        builtin_name = ast.Name("@py_builtins", ast.Load())
+        return ast.Attribute(builtin_name, name, ast.Load())
+
+    def explanation_param(self, expr):
+        specifier = "py" + str(next(self.variable_counter))
+        self.explanation_specifiers[specifier] = expr
+        return "%(" + specifier + ")s"
+
+    def push_format_context(self):
+        self.explanation_specifiers = {}
+        self.stack.append(self.explanation_specifiers)
+
+    def pop_format_context(self, expl_expr):
+        current = self.stack.pop()
+        if self.stack:
+            self.explanation_specifiers = self.stack[-1]
+        keys = [ast.Str(key) for key in current.keys()]
+        format_dict = ast.Dict(keys, list(current.values()))
+        form = ast.BinOp(expl_expr, ast.Mod(), format_dict)
+        name = "@py_format" + str(next(self.variable_counter))
+        self.on_failure.append(ast.Assign([ast.Name(name, ast.Store())], form))
+        return ast.Name(name, ast.Load())
+
+    def generic_visit(self, node):
+        """Handle expressions we don't have custom code for."""
+        assert isinstance(node, ast.expr)
+        res = self.assign(node)
+        return res, self.explanation_param(self.display(res))
+
+    def visit_Assert(self, assert_):
+        if assert_.msg:
+            # There's already a message. Don't mess with it.
+            return [assert_]
+        self.statements = []
+        self.variables = set()
+        self.variable_counter = itertools.count()
+        self.stack = []
+        self.on_failure = []
+        self.push_format_context()
+        # Rewrite assert into a bunch of statements.
+        top_condition, explanation = self.visit(assert_.test)
+        # Create failure message.
+        body = self.on_failure
+        negation = ast.UnaryOp(ast.Not(), top_condition)
+        self.statements.append(ast.If(negation, body, []))
+        explanation = "assert " + explanation
+        template = ast.Str(explanation)
+        msg = self.pop_format_context(template)
+        fmt = self.helper("format_explanation", msg)
+        err_name = ast.Name("AssertionError", ast.Load())
+        exc = ast.Call(err_name, [fmt], [], None, None)
+        if sys.version_info[0] >= 3:
+            raise_ = ast.Raise(exc, None)
+        else:
+            raise_ = ast.Raise(exc, None, None)
+        body.append(raise_)
+        # Delete temporary variables.
+        names = [ast.Name(name, ast.Del()) for name in self.variables]
+        if names:
+            delete = ast.Delete(names)
+            self.statements.append(delete)
+        # Fix line numbers.
+        for stmt in self.statements:
+            set_location(stmt, assert_.lineno, assert_.col_offset)
+        return self.statements
+
+    def visit_Name(self, name):
+        # Check if the name is local or not.
+        locs = ast.Call(self.builtin("locals"), [], [], None, None)
+        globs = ast.Call(self.builtin("globals"), [], [], None, None)
+        ops = [ast.In(), ast.IsNot()]
+        test = ast.Compare(ast.Str(name.id), ops, [locs, globs])
+        expr = ast.IfExp(test, self.display(name), ast.Str(name.id))
+        return name, self.explanation_param(expr)
+
+    def visit_BoolOp(self, boolop):
+        operands = []
+        explanations = []
+        self.push_format_context()
+        for operand in boolop.values:
+            res, explanation = self.visit(operand)
+            operands.append(res)
+            explanations.append(explanation)
+        expls = ast.Tuple([ast.Str(expl) for expl in explanations], ast.Load())
+        is_or = ast.Num(isinstance(boolop.op, ast.Or))
+        expl_template = self.helper("format_boolop",
+                                    ast.Tuple(operands, ast.Load()), expls,
+                                    is_or)
+        expl = self.pop_format_context(expl_template)
+        res = self.assign(ast.BoolOp(boolop.op, operands))
+        return res, self.explanation_param(expl)
+
+    def visit_UnaryOp(self, unary):
+        pattern = unary_map[unary.op.__class__]
+        operand_res, operand_expl = self.visit(unary.operand)
+        res = self.assign(ast.UnaryOp(unary.op, operand_res))
+        return res, pattern % (operand_expl,)
+
+    def visit_BinOp(self, binop):
+        symbol = binop_map[binop.op.__class__]
+        left_expr, left_expl = self.visit(binop.left)
+        right_expr, right_expl = self.visit(binop.right)
+        explanation = "(%s %s %s)" % (left_expl, symbol, right_expl)
+        res = self.assign(ast.BinOp(left_expr, binop.op, right_expr))
+        return res, explanation
+
+    def visit_Call(self, call):
+        new_func, func_expl = self.visit(call.func)
+        arg_expls = []
+        new_args = []
+        new_kwargs = []
+        new_star = new_kwarg = None
+        for arg in call.args:
+            res, expl = self.visit(arg)
+            new_args.append(res)
+            arg_expls.append(expl)
+        for keyword in call.keywords:
+            res, expl = self.visit(keyword.value)
+            new_kwargs.append(ast.keyword(keyword.arg, res))
+            arg_expls.append(keyword.arg + "=" + expl)
+        if call.starargs:
+            new_star, expl = self.visit(call.starargs)
+            arg_expls.append("*" + expl)
+        if call.kwargs:
+            new_kwarg, expl = self.visit(call.kwarg)
+            arg_expls.append("**" + expl)
+        expl = "%s(%s)" % (func_expl, ', '.join(arg_expls))
+        new_call = ast.Call(new_func, new_args, new_kwargs, new_star, new_kwarg)
+        res = self.assign(new_call)
+        res_expl = self.explanation_param(self.display(res))
+        outer_expl = "%s\n{%s = %s\n}" % (res_expl, res_expl, expl)
+        return res, outer_expl
+
+    def visit_Attribute(self, attr):
+        if not isinstance(attr.ctx, ast.Load):
+            return self.generic_visit(attr)
+        value, value_expl = self.visit(attr.value)
+        res = self.assign(ast.Attribute(value, attr.attr, ast.Load()))
+        res_expl = self.explanation_param(self.display(res))
+        pat = "%s\n{%s = %s.%s\n}"
+        expl = pat % (res_expl, res_expl, value_expl, attr.attr)
+        return res, expl
+
+    def visit_Compare(self, comp):
+        self.push_format_context()
+        left_res, left_expl = self.visit(comp.left)
+        res_variables = [self.variable() for i in range(len(comp.ops))]
+        load_names = [ast.Name(v, ast.Load()) for v in res_variables]
+        store_names = [ast.Name(v, ast.Store()) for v in res_variables]
+        it = zip(range(len(comp.ops)), comp.ops, comp.comparators)
+        expls = []
+        syms = []
+        results = [left_res]
+        for i, op, next_operand in it:
+            next_res, next_expl = self.visit(next_operand)
+            results.append(next_res)
+            sym = binop_map[op.__class__]
+            syms.append(ast.Str(sym))
+            expl = "%s %s %s" % (left_expl, sym, next_expl)
+            expls.append(ast.Str(expl))
+            res_expr = ast.Compare(left_res, [op], [next_res])
+            self.statements.append(ast.Assign([store_names[i]], res_expr))
+            left_res, left_expl = next_res, next_expl
+        # Use py.code._reprcompare if that's available.
+        expl_call = self.helper("call_reprcompare",
+                                ast.Tuple(syms, ast.Load()),
+                                ast.Tuple(load_names, ast.Load()),
+                                ast.Tuple(expls, ast.Load()),
+                                ast.Tuple(results, ast.Load()))
+        if len(comp.ops) > 1:
+            res = ast.BoolOp(ast.And(), load_names)
+        else:
+            res = load_names[0]
+        return res, self.explanation_param(self.pop_format_context(expl_call))
diff --git a/_pytest/assertion/util.py b/_pytest/assertion/util.py
new file mode 100644
--- /dev/null
+++ b/_pytest/assertion/util.py
@@ -0,0 +1,213 @@
+"""Utilities for assertion debugging"""
+
+import py
+
+
+# The _reprcompare attribute on the util module is used by the new assertion
+# interpretation code and assertion rewriter to detect this plugin was
+# loaded and in turn call the hooks defined here as part of the
+# DebugInterpreter.
+_reprcompare = None
+
+def format_explanation(explanation):
+    """This formats an explanation
+
+    Normally all embedded newlines are escaped, however there are
+    three exceptions: \n{, \n} and \n~.  The first two are intended
+    cover nested explanations, see function and attribute explanations
+    for examples (.visit_Call(), visit_Attribute()).  The last one is
+    for when one explanation needs to span multiple lines, e.g. when
+    displaying diffs.
+    """
+    # simplify 'assert False where False = ...'
+    where = 0
+    while True:
+        start = where = explanation.find("False\n{False = ", where)
+        if where == -1:
+            break
+        level = 0
+        for i, c in enumerate(explanation[start:]):
+            if c == "{":
+                level += 1
+            elif c == "}":
+                level -= 1
+                if not level:
+                    break
+        else:
+            raise AssertionError("unbalanced braces: %r" % (explanation,))
+        end = start + i
+        where = end
+        if explanation[end - 1] == '\n':
+            explanation = (explanation[:start] + explanation[start+15:end-1] +
+                           explanation[end+1:])
+            where -= 17
+    raw_lines = (explanation or '').split('\n')
+    # escape newlines not followed by {, } and ~
+    lines = [raw_lines[0]]
+    for l in raw_lines[1:]:
+        if l.startswith('{') or l.startswith('}') or l.startswith('~'):
+            lines.append(l)
+        else:
+            lines[-1] += '\\n' + l
+
+    result = lines[:1]
+    stack = [0]
+    stackcnt = [0]
+    for line in lines[1:]:
+        if line.startswith('{'):
+            if stackcnt[-1]:
+                s = 'and   '
+            else:
+                s = 'where '
+            stack.append(len(result))
+            stackcnt[-1] += 1
+            stackcnt.append(0)
+            result.append(' +' + '  '*(len(stack)-1) + s + line[1:])
+        elif line.startswith('}'):
+            assert line.startswith('}')
+            stack.pop()
+            stackcnt.pop()
+            result[stack[-1]] += line[1:]
+        else:
+            assert line.startswith('~')
+            result.append('  '*len(stack) + line[1:])
+    assert len(stack) == 1
+    return '\n'.join(result)
+
+
+# Provide basestring in python3
+try:
+    basestring = basestring
+except NameError:
+    basestring = str
+
+
+def assertrepr_compare(op, left, right):
+    """return specialised explanations for some operators/operands"""
+    width = 80 - 15 - len(op) - 2 # 15 chars indentation, 1 space around op
+    left_repr = py.io.saferepr(left, maxsize=int(width/2))
+    right_repr = py.io.saferepr(right, maxsize=width-len(left_repr))
+    summary = '%s %s %s' % (left_repr, op, right_repr)
+
+    issequence = lambda x: isinstance(x, (list, tuple))
+    istext = lambda x: isinstance(x, basestring)
+    isdict = lambda x: isinstance(x, dict)
+    isset = lambda x: isinstance(x, set)
+
+    explanation = None
+    try:
+        if op == '==':
+            if istext(left) and istext(right):
+                explanation = _diff_text(left, right)
+            elif issequence(left) and issequence(right):
+                explanation = _compare_eq_sequence(left, right)
+            elif isset(left) and isset(right):
+                explanation = _compare_eq_set(left, right)
+            elif isdict(left) and isdict(right):
+                explanation = _diff_text(py.std.pprint.pformat(left),
+                                         py.std.pprint.pformat(right))
+        elif op == 'not in':
+            if istext(left) and istext(right):
+                explanation = _notin_text(left, right)
+    except py.builtin._sysex:
+        raise
+    except:
+        excinfo = py.code.ExceptionInfo()
+        explanation = ['(pytest_assertion plugin: representation of '
+            'details failed. Probably an object has a faulty __repr__.)',
+            str(excinfo)
+            ]
+
+
+    if not explanation:
+        return None
+
+    # Don't include pageloads of data, should be configurable
+    if len(''.join(explanation)) > 80*8:
+        explanation = ['Detailed information too verbose, truncated']
+
+    return [summary] + explanation
+
+
+def _diff_text(left, right):
+    """Return the explanation for the diff between text
+
+    This will skip leading and trailing characters which are
+    identical to keep the diff minimal.
+    """
+    explanation = []
+    i = 0 # just in case left or right has zero length
+    for i in range(min(len(left), len(right))):
+        if left[i] != right[i]:
+            break
+    if i > 42:
+        i -= 10                 # Provide some context
+        explanation = ['Skipping %s identical '
+                       'leading characters in diff' % i]
+        left = left[i:]
+        right = right[i:]
+    if len(left) == len(right):
+        for i in range(len(left)):
+            if left[-i] != right[-i]:
+                break
+        if i > 42:
+            i -= 10     # Provide some context
+            explanation += ['Skipping %s identical '
+                            'trailing characters in diff' % i]
+            left = left[:-i]
+            right = right[:-i]
+    explanation += [line.strip('\n')
+                    for line in py.std.difflib.ndiff(left.splitlines(),
+                                                     right.splitlines())]
+    return explanation
+
+
+def _compare_eq_sequence(left, right):
+    explanation = []
+    for i in range(min(len(left), len(right))):
+        if left[i] != right[i]:
+            explanation += ['At index %s diff: %r != %r' %
+                            (i, left[i], right[i])]
+            break
+    if len(left) > len(right):
+        explanation += ['Left contains more items, '
+            'first extra item: %s' % py.io.saferepr(left[len(right)],)]
+    elif len(left) < len(right):
+        explanation += ['Right contains more items, '
+            'first extra item: %s' % py.io.saferepr(right[len(left)],)]
+    return explanation # + _diff_text(py.std.pprint.pformat(left),
+                       #             py.std.pprint.pformat(right))
+
+
+def _compare_eq_set(left, right):
+    explanation = []
+    diff_left = left - right
+    diff_right = right - left
+    if diff_left:
+        explanation.append('Extra items in the left set:')
+        for item in diff_left:
+            explanation.append(py.io.saferepr(item))
+    if diff_right:
+        explanation.append('Extra items in the right set:')
+        for item in diff_right:
+            explanation.append(py.io.saferepr(item))
+    return explanation
+
+
+def _notin_text(term, text):
+    index = text.find(term)
+    head = text[:index]
+    tail = text[index+len(term):]
+    correct_text = head + tail
+    diff = _diff_text(correct_text, text)
+    newdiff = ['%s is contained here:' % py.io.saferepr(term, maxsize=42)]
+    for line in diff:
+        if line.startswith('Skipping'):
+            continue
+        if line.startswith('- '):
+            continue
+        if line.startswith('+ '):
+            newdiff.append('  ' + line[2:])
+        else:
+            newdiff.append(line)
+    return newdiff
diff --git a/_pytest/doctest.py b/_pytest/doctest.py
--- a/_pytest/doctest.py
+++ b/_pytest/doctest.py
@@ -59,7 +59,7 @@
                 inner_excinfo = py.code.ExceptionInfo(excinfo.value.exc_info)
                 lines += ["UNEXPECTED EXCEPTION: %s" %
                             repr(inner_excinfo.value)]
-
+                lines += py.std.traceback.format_exception(*excinfo.value.exc_info)
             return ReprFailDoctest(reprlocation, lines)
         else:
             return super(DoctestItem, self).repr_failure(excinfo)
diff --git a/_pytest/helpconfig.py b/_pytest/helpconfig.py
--- a/_pytest/helpconfig.py
+++ b/_pytest/helpconfig.py
@@ -16,9 +16,6 @@
     group.addoption('--traceconfig',
                action="store_true", dest="traceconfig", default=False,
                help="trace considerations of conftest.py files."),
-    group._addoption('--nomagic',
-               action="store_true", dest="nomagic", default=False,
-               help="don't reinterpret asserts, no traceback cutting. ")
     group.addoption('--debug',
                action="store_true", dest="debug", default=False,
                help="generate and show internal debugging information.")
diff --git a/_pytest/junitxml.py b/_pytest/junitxml.py
--- a/_pytest/junitxml.py
+++ b/_pytest/junitxml.py
@@ -65,7 +65,8 @@
 
 class LogXML(object):
     def __init__(self, logfile, prefix):
-        self.logfile = logfile
+        logfile = os.path.expanduser(os.path.expandvars(logfile))
+        self.logfile = os.path.normpath(logfile)
         self.prefix = prefix
         self.test_logs = []
         self.passed = self.skipped = 0
@@ -76,7 +77,7 @@
         names = report.nodeid.split("::")
         names[0] = names[0].replace("/", '.')
         names = tuple(names)
-        d = {'time': self._durations.pop(names, "0")}
+        d = {'time': self._durations.pop(report.nodeid, "0")}
         names = [x.replace(".py", "") for x in names if x != "()"]
         classnames = names[:-1]
         if self.prefix:
@@ -170,12 +171,11 @@
             self.append_skipped(report)
 
     def pytest_runtest_call(self, item, __multicall__):
-        names = tuple(item.listnames())
         start = time.time()
         try:
             return __multicall__.execute()
         finally:
-            self._durations[names] = time.time() - start
+            self._durations[item.nodeid] = time.time() - start
 
     def pytest_collectreport(self, report):
         if not report.passed:
diff --git a/_pytest/main.py b/_pytest/main.py
--- a/_pytest/main.py
+++ b/_pytest/main.py
@@ -46,23 +46,25 @@
 
 
 def pytest_namespace():
-    return dict(collect=dict(Item=Item, Collector=Collector, File=File))
+    collect = dict(Item=Item, Collector=Collector, File=File, Session=Session)
+    return dict(collect=collect)
         
 def pytest_configure(config):
     py.test.config = config # compatibiltiy
     if config.option.exitfirst:
         config.option.maxfail = 1
 
-def pytest_cmdline_main(config):
-    """ default command line protocol for initialization, session,
-    running tests and reporting. """
+def wrap_session(config, doit):
+    """Skeleton command line program"""
     session = Session(config)
     session.exitstatus = EXIT_OK
+    initstate = 0
     try:
         config.pluginmanager.do_configure(config)
+        initstate = 1
         config.hook.pytest_sessionstart(session=session)
-        config.hook.pytest_collection(session=session)
-        config.hook.pytest_runtestloop(session=session)
+        initstate = 2
+        doit(config, session)
     except pytest.UsageError:
         raise
     except KeyboardInterrupt:
@@ -77,18 +79,24 @@
             sys.stderr.write("mainloop: caught Spurious SystemExit!\n")
     if not session.exitstatus and session._testsfailed:
         session.exitstatus = EXIT_TESTSFAILED
-    config.hook.pytest_sessionfinish(session=session,
-        exitstatus=session.exitstatus)
-    config.pluginmanager.do_unconfigure(config)
+    if initstate >= 2:
+        config.hook.pytest_sessionfinish(session=session,
+            exitstatus=session.exitstatus)
+    if initstate >= 1:
+        config.pluginmanager.do_unconfigure(config)
     return session.exitstatus
 
+def pytest_cmdline_main(config):
+    return wrap_session(config, _main)
+
+def _main(config, session):
+    """ default command line protocol for initialization, session,
+    running tests and reporting. """
+    config.hook.pytest_collection(session=session)
+    config.hook.pytest_runtestloop(session=session)
+
 def pytest_collection(session):
-    session.perform_collect()
-    hook = session.config.hook
-    hook.pytest_collection_modifyitems(session=session,
-        config=session.config, items=session.items)
-    hook.pytest_collection_finish(session=session)
-    return True
+    return session.perform_collect()
 
 def pytest_runtestloop(session):
     if session.config.option.collectonly:
@@ -374,6 +382,16 @@
         return HookProxy(fspath, self.config)
 
     def perform_collect(self, args=None, genitems=True):
+        hook = self.config.hook
+        try:
+            items = self._perform_collect(args, genitems)
+            hook.pytest_collection_modifyitems(session=self,
+                config=self.config, items=items)
+        finally:
+            hook.pytest_collection_finish(session=self)
+        return items
+
+    def _perform_collect(self, args, genitems):
         if args is None:
             args = self.config.args
         self.trace("perform_collect", self, args)
diff --git a/_pytest/mark.py b/_pytest/mark.py
--- a/_pytest/mark.py
+++ b/_pytest/mark.py
@@ -153,7 +153,7 @@
 
     def __repr__(self):
         return "<MarkInfo %r args=%r kwargs=%r>" % (
-                self._name, self.args, self.kwargs)
+                self.name, self.args, self.kwargs)
 
 def pytest_itemcollected(item):
     if not isinstance(item, pytest.Function):
diff --git a/_pytest/pytester.py b/_pytest/pytester.py
--- a/_pytest/pytester.py
+++ b/_pytest/pytester.py
@@ -6,7 +6,7 @@
 import inspect
 import time
 from fnmatch import fnmatch
-from _pytest.main import Session
+from _pytest.main import Session, EXIT_OK
 from py.builtin import print_
 from _pytest.core import HookRelay
 
@@ -292,13 +292,19 @@
         assert '::' not in str(arg)
         p = py.path.local(arg)
         x = session.fspath.bestrelpath(p)
-        return session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionstart(session=session)
+        res = session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
+        return res
 
     def getpathnode(self, path):
-        config = self.parseconfig(path)
+        config = self.parseconfigure(path)
         session = Session(config)
         x = session.fspath.bestrelpath(path)
-        return session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionstart(session=session)
+        res = session.perform_collect([x], genitems=False)[0]
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
+        return res
 
     def genitems(self, colitems):
         session = colitems[0].session
@@ -312,7 +318,9 @@
         config = self.parseconfigure(*args)
         rec = self.getreportrecorder(config)
         session = Session(config)
+        config.hook.pytest_sessionstart(session=session)
         session.perform_collect()
+        config.hook.pytest_sessionfinish(session=session, exitstatus=EXIT_OK)
         return session.items, rec
 
     def runitem(self, source):
@@ -382,6 +390,8 @@
             c.basetemp = py.path.local.make_numbered_dir(prefix="reparse",
                 keep=0, rootdir=self.tmpdir, lock_timeout=None)
             c.parse(args)
+            c.pluginmanager.do_configure(c)
+            self.request.addfinalizer(lambda: c.pluginmanager.do_unconfigure(c))
             return c
         finally:
             py.test.config = oldconfig
diff --git a/_pytest/python.py b/_pytest/python.py
--- a/_pytest/python.py
+++ b/_pytest/python.py
@@ -226,8 +226,13 @@
 
     def _importtestmodule(self):
         # we assume we are only called once per module
+        from _pytest import assertion
+        assertion.before_module_import(self)
         try:
-            mod = self.fspath.pyimport(ensuresyspath=True)
+            try:
+                mod = self.fspath.pyimport(ensuresyspath=True)
+            finally:
+                assertion.after_module_import(self)
         except SyntaxError:
             excinfo = py.code.ExceptionInfo()
             raise self.CollectError(excinfo.getrepr(style="short"))
@@ -374,7 +379,7 @@
         # test generators are seen as collectors but they also
         # invoke setup/teardown on popular request
         # (induced by the common "test_*" naming shared with normal tests)
-        self.config._setupstate.prepare(self)
+        self.session._setupstate.prepare(self)
         # see FunctionMixin.setup and test_setupstate_is_preserved_134
         self._preservedparent = self.parent.obj
         l = []
@@ -721,7 +726,7 @@
 
     def _addfinalizer(self, finalizer, scope):
         colitem = self._getscopeitem(scope)
-        self.config._setupstate.addfinalizer(
+        self._pyfuncitem.session._setupstate.addfinalizer(
             finalizer=finalizer, colitem=colitem)
 
     def __repr__(self):
@@ -742,8 +747,10 @@
         raise self.LookupError(msg)
 
 def showfuncargs(config):
-    from _pytest.main import Session
-    session = Session(config)
+    from _pytest.main import wrap_session
+    return wrap_session(config, _showfuncargs_main)
+
+def _showfuncargs_main(config, session):
     session.perform_collect()
     if session.items:
         plugins = session.items[0].getplugins()
diff --git a/_pytest/runner.py b/_pytest/runner.py
--- a/_pytest/runner.py
+++ b/_pytest/runner.py
@@ -14,17 +14,15 @@
 #
 # pytest plugin hooks
 
-# XXX move to pytest_sessionstart and fix py.test owns tests
-def pytest_configure(config):
-    config._setupstate = SetupState()
+def pytest_sessionstart(session):
+    session._setupstate = SetupState()
 
 def pytest_sessionfinish(session, exitstatus):
-    if hasattr(session.config, '_setupstate'):
-        hook = session.config.hook
-        rep = hook.pytest__teardown_final(session=session)
-        if rep:
-            hook.pytest__teardown_final_logerror(session=session, report=rep)
-            session.exitstatus = 1
+    hook = session.config.hook
+    rep = hook.pytest__teardown_final(session=session)
+    if rep:
+        hook.pytest__teardown_final_logerror(session=session, report=rep)
+        session.exitstatus = 1
 
 class NodeInfo:
     def __init__(self, location):
@@ -46,16 +44,16 @@
     return reports
 
 def pytest_runtest_setup(item):
-    item.config._setupstate.prepare(item)
+    item.session._setupstate.prepare(item)
 
 def pytest_runtest_call(item):
     item.runtest()
 
 def pytest_runtest_teardown(item):
-    item.config._setupstate.teardown_exact(item)
+    item.session._setupstate.teardown_exact(item)
 
 def pytest__teardown_final(session):
-    call = CallInfo(session.config._setupstate.teardown_all, when="teardown")
+    call = CallInfo(session._setupstate.teardown_all, when="teardown")
     if call.excinfo:
         ntraceback = call.excinfo.traceback .cut(excludepath=py._pydir)
         call.excinfo.traceback = ntraceback.filter()
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -569,7 +569,6 @@
 #
 import os
 import time
-import socket
 import getpass
 
 class ReallyRunFileExternal(py.test.collect.Item): 
diff --git a/lib-python/modified-2.7/ctypes/__init__.py b/lib-python/modified-2.7/ctypes/__init__.py
--- a/lib-python/modified-2.7/ctypes/__init__.py
+++ b/lib-python/modified-2.7/ctypes/__init__.py
@@ -7,6 +7,7 @@
 
 __version__ = "1.1.0"
 
+import _ffi
 from _ctypes import Union, Structure, Array
 from _ctypes import _Pointer
 from _ctypes import CFuncPtr as _CFuncPtr
@@ -350,7 +351,7 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            self._handle = _dlopen(self._name, mode)
+            self._handle = _ffi.CDLL(name)
         else:
             self._handle = handle
 
diff --git a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
--- a/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
+++ b/lib-python/modified-2.7/ctypes/test/test_cfuncs.py
@@ -3,8 +3,8 @@
 
 import unittest
 from ctypes import *
-
 import _ctypes_test
+from test.test_support import impl_detail
 
 class CFunctions(unittest.TestCase):
     _dll = CDLL(_ctypes_test.__file__)
@@ -158,12 +158,14 @@
         self.assertEqual(self._dll.tf_bd(0, 42.), 14.)
         self.assertEqual(self.S(), 42)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble(self):
         self._dll.tf_D.restype = c_longdouble
         self._dll.tf_D.argtypes = (c_longdouble,)
         self.assertEqual(self._dll.tf_D(42.), 14.)
         self.assertEqual(self.S(), 42)
-
+        
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdouble_plus(self):
         self._dll.tf_bD.restype = c_longdouble
         self._dll.tf_bD.argtypes = (c_byte, c_longdouble)
diff --git a/lib-python/modified-2.7/ctypes/test/test_functions.py b/lib-python/modified-2.7/ctypes/test/test_functions.py
--- a/lib-python/modified-2.7/ctypes/test/test_functions.py
+++ b/lib-python/modified-2.7/ctypes/test/test_functions.py
@@ -8,6 +8,7 @@
 from ctypes import *
 import sys, unittest
 from ctypes.test import xfail
+from test.test_support import impl_detail
 
 try:
     WINFUNCTYPE
@@ -144,6 +145,7 @@
         self.assertEqual(result, -21)
         self.assertEqual(type(result), float)
 
+    @impl_detail('long double not supported by PyPy', pypy=False)
     def test_longdoubleresult(self):
         f = dll._testfunc_D_bhilfD
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_longdouble]
diff --git a/lib-python/modified-2.7/ctypes/test/test_libc.py b/lib-python/modified-2.7/ctypes/test/test_libc.py
--- a/lib-python/modified-2.7/ctypes/test/test_libc.py
+++ b/lib-python/modified-2.7/ctypes/test/test_libc.py
@@ -26,6 +26,7 @@
         self.assertEqual(chars.raw, "   ,,aaaadmmmnpppsss\x00")
 
     def test_no_more_xfail(self):
+        import socket
         import ctypes.test
         self.assertTrue(not hasattr(ctypes.test, 'xfail'),
                         "You should incrementally grep for '@xfail' and remove them, they are real failures")
diff --git a/lib-python/modified-2.7/distutils/cygwinccompiler.py b/lib-python/modified-2.7/distutils/cygwinccompiler.py
--- a/lib-python/modified-2.7/distutils/cygwinccompiler.py
+++ b/lib-python/modified-2.7/distutils/cygwinccompiler.py
@@ -75,6 +75,9 @@
         elif msc_ver == '1500':
             # VS2008 / MSVC 9.0
             return ['msvcr90']
+        elif msc_ver == '1600':
+            # VS2010 / MSVC 10.0
+            return ['msvcr100']
         else:
             raise ValueError("Unknown MS Compiler version %s " % msc_ver)
 
diff --git a/lib-python/modified-2.7/opcode.py b/lib-python/modified-2.7/opcode.py
--- a/lib-python/modified-2.7/opcode.py
+++ b/lib-python/modified-2.7/opcode.py
@@ -189,7 +189,6 @@
 def_op('MAP_ADD', 147)
 
 # pypy modification, experimental bytecode
-def_op('CALL_LIKELY_BUILTIN', 200)    # #args + (#kwargs << 8)
 def_op('LOOKUP_METHOD', 201)          # Index in name list
 hasname.append(201)
 def_op('CALL_METHOD', 202)            # #args not including 'self'
diff --git a/lib-python/modified-2.7/pickle.py b/lib-python/modified-2.7/pickle.py
--- a/lib-python/modified-2.7/pickle.py
+++ b/lib-python/modified-2.7/pickle.py
@@ -168,7 +168,7 @@
 
 # Pickling machinery
 
-class Pickler:
+class Pickler(object):
 
     def __init__(self, file, protocol=None):
         """This takes a file-like object for writing a pickle data stream.
@@ -873,7 +873,7 @@
 
 # Unpickling machinery
 
-class Unpickler:
+class Unpickler(object):
 
     def __init__(self, file):
         """This takes a file-like object for reading a pickle data stream.
diff --git a/lib-python/modified-2.7/test/test_descr.py b/lib-python/modified-2.7/test/test_descr.py
--- a/lib-python/modified-2.7/test/test_descr.py
+++ b/lib-python/modified-2.7/test/test_descr.py
@@ -4399,13 +4399,10 @@
         self.assertTrue(l.__add__ != [5].__add__)
         self.assertTrue(l.__add__ != l.__mul__)
         self.assertTrue(l.__add__.__name__ == '__add__')
-        if hasattr(l.__add__, '__self__'):
-            # CPython
-            self.assertTrue(l.__add__.__self__ is l)
+        self.assertTrue(l.__add__.__self__ is l)
+        if hasattr(l.__add__, '__objclass__'):   # CPython
             self.assertTrue(l.__add__.__objclass__ is list)
-        else:
-            # Python implementations where [].__add__ is a normal bound method
-            self.assertTrue(l.__add__.im_self is l)
+        else:                                    # PyPy
             self.assertTrue(l.__add__.im_class is list)
         self.assertEqual(l.__add__.__doc__, list.__add__.__doc__)
         try:
diff --git a/lib-python/modified-2.7/test/test_dis.py b/lib-python/modified-2.7/test/test_dis.py
deleted file mode 100644
--- a/lib-python/modified-2.7/test/test_dis.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Minimal tests for dis module
-
-from test.test_support import run_unittest
-import unittest
-import sys
-import dis
-import StringIO
-
-
-def _f(a):
-    print a
-    return 1
-
-dis_f = """\
- %-4d         0 LOAD_FAST                0 (a)
-              3 PRINT_ITEM
-              4 PRINT_NEWLINE
-
- %-4d         5 LOAD_CONST               1 (1)
-              8 RETURN_VALUE
-"""%(_f.func_code.co_firstlineno + 1,
-     _f.func_code.co_firstlineno + 2)
-
-
-# we "call" rangexxx() instead of range() to disable the
-# pypy optimization that turns it into CALL_LIKELY_BUILTIN.
-def bug708901():
-    for res in rangexxx(1,
-                        10):
-        pass
-
-dis_bug708901 = """\
- %-4d         0 SETUP_LOOP              23 (to 26)
-              3 LOAD_GLOBAL              0 (rangexxx)
-              6 LOAD_CONST               1 (1)
-
- %-4d         9 LOAD_CONST               2 (10)
-             12 CALL_FUNCTION            2
-             15 GET_ITER
-        >>   16 FOR_ITER                 6 (to 25)
-             19 STORE_FAST               0 (res)
-
- %-4d        22 JUMP_ABSOLUTE           16
-        >>   25 POP_BLOCK
-        >>   26 LOAD_CONST               0 (None)
-             29 RETURN_VALUE
-"""%(bug708901.func_code.co_firstlineno + 1,
-     bug708901.func_code.co_firstlineno + 2,
-     bug708901.func_code.co_firstlineno + 3)
-
-
-def bug1333982(x=[]):
-    assert 0, ([s for s in x] +
-              1)
-    pass
-
-dis_bug1333982 = """\
- %-4d         0 LOAD_CONST               1 (0)
-              3 POP_JUMP_IF_TRUE        38
-              6 LOAD_GLOBAL              0 (AssertionError)
-              9 BUILD_LIST               0
-             12 LOAD_FAST                0 (x)
-             15 GET_ITER
-        >>   16 FOR_ITER                12 (to 31)
-             19 STORE_FAST               1 (s)
-             22 LOAD_FAST                1 (s)
-             25 LIST_APPEND              2
-             28 JUMP_ABSOLUTE           16
-
- %-4d   >>   31 LOAD_CONST               2 (1)
-             34 BINARY_ADD
-             35 RAISE_VARARGS            2
-
- %-4d   >>   38 LOAD_CONST               0 (None)
-             41 RETURN_VALUE
-"""%(bug1333982.func_code.co_firstlineno + 1,
-     bug1333982.func_code.co_firstlineno + 2,
-     bug1333982.func_code.co_firstlineno + 3)
-
-_BIG_LINENO_FORMAT = """\
-%3d           0 LOAD_GLOBAL              0 (spam)
-              3 POP_TOP
-              4 LOAD_CONST               0 (None)
-              7 RETURN_VALUE
-"""
-
-class DisTests(unittest.TestCase):
-    def do_disassembly_test(self, func, expected):
-        s = StringIO.StringIO()
-        save_stdout = sys.stdout
-        sys.stdout = s
-        dis.dis(func)
-        sys.stdout = save_stdout
-        got = s.getvalue()
-        # Trim trailing blanks (if any).
-        lines = got.split('\n')
-        lines = [line.rstrip() for line in lines]
-        expected = expected.split("\n")
-        import difflib
-        if expected != lines:
-            self.fail(
-                "events did not match expectation:\n" +
-                "\n".join(difflib.ndiff(expected,
-                                        lines)))
-
-    def test_opmap(self):
-        self.assertEqual(dis.opmap["STOP_CODE"], 0)
-        self.assertIn(dis.opmap["LOAD_CONST"], dis.hasconst)
-        self.assertIn(dis.opmap["STORE_NAME"], dis.hasname)
-
-    def test_opname(self):
-        self.assertEqual(dis.opname[dis.opmap["LOAD_FAST"]], "LOAD_FAST")
-
-    def test_boundaries(self):
-        self.assertEqual(dis.opmap["EXTENDED_ARG"], dis.EXTENDED_ARG)
-        self.assertEqual(dis.opmap["STORE_NAME"], dis.HAVE_ARGUMENT)
-
-    def test_dis(self):
-        self.do_disassembly_test(_f, dis_f)
-
-    def test_bug_708901(self):
-        self.do_disassembly_test(bug708901, dis_bug708901)
-
-    def test_bug_1333982(self):
-        # This one is checking bytecodes generated for an `assert` statement,
-        # so fails if the tests are run with -O.  Skip this test then.
-        if __debug__:
-            self.do_disassembly_test(bug1333982, dis_bug1333982)
-
-    def test_big_linenos(self):
-        def func(count):
-            namespace = {}
-            func = "def foo():\n " + "".join(["\n "] * count + ["spam\n"])
-            exec func in namespace
-            return namespace['foo']
-
-        # Test all small ranges
-        for i in xrange(1, 300):
-            expected = _BIG_LINENO_FORMAT % (i + 2)
-            self.do_disassembly_test(func(i), expected)
-
-        # Test some larger ranges too
-        for i in xrange(300, 5000, 10):
-            expected = _BIG_LINENO_FORMAT % (i + 2)
-            self.do_disassembly_test(func(i), expected)
-
-def test_main():
-    run_unittest(DisTests)
-
-
-if __name__ == "__main__":
-    test_main()
diff --git a/lib-python/modified-2.7/test/test_extcall.py b/lib-python/modified-2.7/test/test_extcall.py
--- a/lib-python/modified-2.7/test/test_extcall.py
+++ b/lib-python/modified-2.7/test/test_extcall.py
@@ -299,7 +299,7 @@
         def f(a):
             return a
         self.assertEqual(f(**{u'a': 4}), 4)
-        self.assertRaises(TypeError, lambda: f(**{u'st&#246;ren': 4}))
+        self.assertRaises(TypeError, f, **{u'st&#246;ren': 4})
         self.assertRaises(TypeError, f, **{u'someLongString':2})
         try:
             f(a=4, **{u'a': 4})
diff --git a/lib-python/2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
copy from lib-python/2.7/test/test_multibytecodec.py
copy to lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -42,7 +42,7 @@
         dec = codecs.getdecoder('euc-kr')
         myreplace  = lambda exc: (u'', sys.maxint+1)
         codecs.register_error('test.cjktest', myreplace)
-        self.assertRaises(IndexError, dec,
+        self.assertRaises((IndexError, OverflowError), dec,
                           'apple\x92ham\x93spam', 'test.cjktest')
 
     def test_codingspec(self):
diff --git a/lib-python/2.7/test/test_multibytecodec_support.py b/lib-python/modified-2.7/test/test_multibytecodec_support.py
copy from lib-python/2.7/test/test_multibytecodec_support.py
copy to lib-python/modified-2.7/test/test_multibytecodec_support.py
--- a/lib-python/2.7/test/test_multibytecodec_support.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec_support.py
@@ -107,8 +107,8 @@
         def myreplace(exc):
             return (u'x', sys.maxint + 1)
         codecs.register_error("test.cjktest", myreplace)
-        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
-                          'test.cjktest')
+        self.assertRaises((IndexError, OverflowError), self.encode,
+                          self.unmappedunicode, 'test.cjktest')
 
     def test_callback_None_index(self):
         def myreplace(exc):
diff --git a/lib-python/2.7/test/test_sets.py b/lib-python/modified-2.7/test/test_sets.py
copy from lib-python/2.7/test/test_sets.py
copy to lib-python/modified-2.7/test/test_sets.py
--- a/lib-python/2.7/test/test_sets.py
+++ b/lib-python/modified-2.7/test/test_sets.py
@@ -686,7 +686,9 @@
         set_list = sorted(self.set)
         self.assertEqual(len(dup_list), len(set_list))
         for i, el in enumerate(dup_list):
-            self.assertIs(el, set_list[i])
+            # Object identity is not guarnteed for immutable objects, so we
+            # can't use assertIs here.
+            self.assertEqual(el, set_list[i])
 
     def test_deep_copy(self):
         dup = copy.deepcopy(self.set)
diff --git a/lib-python/modified-2.7/test/test_support.py b/lib-python/modified-2.7/test/test_support.py
--- a/lib-python/modified-2.7/test/test_support.py
+++ b/lib-python/modified-2.7/test/test_support.py
@@ -1066,7 +1066,7 @@
         if '--pdb' in sys.argv:
             import pdb, traceback
             traceback.print_tb(exc_info[2])
-            pdb.post_mortem(exc_info[2], pdb.Pdb)
+            pdb.post_mortem(exc_info[2])
 
 # ----------------------------------
 
diff --git a/lib-python/modified-2.7/test/test_weakref.py b/lib-python/modified-2.7/test/test_weakref.py
--- a/lib-python/modified-2.7/test/test_weakref.py
+++ b/lib-python/modified-2.7/test/test_weakref.py
@@ -993,13 +993,13 @@
         self.assertTrue(len(weakdict) == 2)
         k, v = weakdict.popitem()
         self.assertTrue(len(weakdict) == 1)
-        if k is key1:
+        if k == key1:
             self.assertTrue(v is value1)
         else:
             self.assertTrue(v is value2)
         k, v = weakdict.popitem()
         self.assertTrue(len(weakdict) == 0)
-        if k is key1:
+        if k == key1:
             self.assertTrue(v is value1)
         else:
             self.assertTrue(v is value2)
diff --git a/lib_pypy/_ctypes/__init__.py b/lib_pypy/_ctypes/__init__.py
--- a/lib_pypy/_ctypes/__init__.py
+++ b/lib_pypy/_ctypes/__init__.py
@@ -18,7 +18,16 @@
 if _os.name in ("nt", "ce"):
     from _rawffi import FormatError
     from _rawffi import check_HRESULT as _check_HRESULT
-    CopyComPointer = None # XXX
+
+    def CopyComPointer(src, dst):
+        from ctypes import c_void_p, cast
+        if src:
+            hr = src[0][0].AddRef(src)
+            if hr & 0x80000000:
+                return hr
+        dst[0] = cast(src, c_void_p).value
+        return 0
+
     LoadLibrary = dlopen
 
 from _rawffi import FUNCFLAG_STDCALL, FUNCFLAG_CDECL, FUNCFLAG_PYTHONAPI
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -208,6 +208,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._get_buffer_value()
+
 ARRAY_CACHE = {}
 
 def create_array_type(base, length):
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -1,5 +1,6 @@
 
 import _rawffi
+import _ffi
 import sys
 
 keepalive_key = str # XXX fix this when provided with test
@@ -46,6 +47,15 @@
         else:
             return self.from_param(as_parameter)
 
+    def get_ffi_param(self, value):
+        cdata = self.from_param(value)
+        return cdata, cdata._to_ffi_param()
+
+    def get_ffi_argtype(self):
+        if self._ffiargtype:
+            return self._ffiargtype
+        return _shape_to_ffi_type(self._ffiargshape)
+
     def _CData_output(self, resbuffer, base=None, index=-1):
         #assert isinstance(resbuffer, _rawffi.ArrayInstance)
         """Used when data exits ctypes and goes into user code.
@@ -99,6 +109,7 @@
     """
     __metaclass__ = _CDataMeta
     _objects = None
+    _ffiargtype = None
 
     def __init__(self, *args, **kwds):
         raise TypeError("%s has no type" % (type(self),))
@@ -119,11 +130,20 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _to_ffi_param(self):
+        if self.__class__._is_pointer_like():
+            return self._get_buffer_value()
+        else:
+            return self.value
+
     def __buffer__(self):
         return buffer(self._buffer)
 
     def _get_b_base(self):
-        return self._base
+        try:
+            return self._base
+        except AttributeError:
+            return None
     _b_base_ = property(_get_b_base)
     _b_needsfree_ = False
 
@@ -150,7 +170,7 @@
     return pointer(cdata)
 
 def cdata_from_address(self, address):
-    # fix the address, in case it's unsigned
+    # fix the address: turn it into as unsigned, in case it's a negative number
     address = address & (sys.maxint * 2 + 1)
     instance = self.__new__(self)
     lgt = getattr(self, '_length_', 1)
@@ -159,3 +179,50 @@
 
 def addressof(tp):
     return tp._buffer.buffer
+
+
+# ----------------------------------------------------------------------
+
+def is_struct_shape(shape):
+    # see the corresponding code to set the shape in
+    # _ctypes.structure._set_shape
+    return (isinstance(shape, tuple) and
+            len(shape) == 2 and
+            isinstance(shape[0], _rawffi.Structure) and
+            shape[1] == 1)
+
+def _shape_to_ffi_type(shape):
+    try:
+        return _shape_to_ffi_type.typemap[shape]
+    except KeyError:
+        pass
+    if is_struct_shape(shape):
+        return shape[0].get_ffi_type()
+    #
+    assert False, 'unknown shape %s' % (shape,)
+
+
+_shape_to_ffi_type.typemap =  {
+    'c' : _ffi.types.char,
+    'b' : _ffi.types.sbyte,
+    'B' : _ffi.types.ubyte,
+    'h' : _ffi.types.sshort,
+    'u' : _ffi.types.unichar,
+    'H' : _ffi.types.ushort,
+    'i' : _ffi.types.sint,
+    'I' : _ffi.types.uint,
+    'l' : _ffi.types.slong,
+    'L' : _ffi.types.ulong,
+    'q' : _ffi.types.slonglong,
+    'Q' : _ffi.types.ulonglong,
+    'f' : _ffi.types.float,
+    'd' : _ffi.types.double,
+    's' : _ffi.types.void_p,
+    'P' : _ffi.types.void_p,
+    'z' : _ffi.types.void_p,
+    'O' : _ffi.types.void_p,
+    'Z' : _ffi.types.void_p,
+    'X' : _ffi.types.void_p,
+    'v' : _ffi.types.sshort,
+    }
+
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -1,12 +1,15 @@
+
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+from _ctypes.primitive import SimpleType, _SimpleCData
+from _ctypes.basics import ArgumentError, keepalive_key
+from _ctypes.basics import is_struct_shape
+from _ctypes.builtin import set_errno, set_last_error
 import _rawffi
+import _ffi
 import sys
 import traceback
 import warnings
 
-from _ctypes.basics import ArgumentError, keepalive_key
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
-from _ctypes.builtin import set_errno, set_last_error
-from _ctypes.primitive import SimpleType
 
 # XXX this file needs huge refactoring I fear
 
@@ -24,6 +27,7 @@
 
 WIN64 = sys.platform == 'win32' and sys.maxint == 2**63 - 1
 
+
 def get_com_error(errcode, riid, pIunk):
     "Win32 specific: build a COM Error exception"
     # XXX need C support code
@@ -36,6 +40,7 @@
     funcptr.restype = int
     return funcptr(*args)
 
+
 class CFuncPtrType(_CDataMeta):
     # XXX write down here defaults and such things
 
@@ -50,6 +55,7 @@
 
     from_address = cdata_from_address
 
+
 class CFuncPtr(_CData):
     __metaclass__ = CFuncPtrType
 
@@ -65,10 +71,12 @@
     callable = None
     _ptr = None
     _buffer = None
+    _address = None
     # win32 COM properties
     _paramflags = None
     _com_index = None
     _com_iid = None
+    _is_fastpath = False
 
     __restype_set = False
 
@@ -85,8 +93,11 @@
                     raise TypeError(
                         "item %d in _argtypes_ has no from_param method" % (
                             i + 1,))
-            self._argtypes_ = argtypes
-
+            #
+            if all([hasattr(argtype, '_ffiargshape') for argtype in argtypes]):
+                fastpath_cls = make_fastpath_subclass(self.__class__)
+                fastpath_cls.enable_fastpath_maybe(self)
+            self._argtypes_ = list(argtypes)
     argtypes = property(_getargtypes, _setargtypes)
 
     def _getparamflags(self):
@@ -133,6 +144,7 @@
 
     paramflags = property(_getparamflags, _setparamflags)
 
+
     def _getrestype(self):
         return self._restype_
 
@@ -146,27 +158,24 @@
                 callable(restype)):
             raise TypeError("restype must be a type, a callable, or None")
         self._restype_ = restype
-
+        
     def _delrestype(self):
         self._ptr = None
         del self._restype_
-
+        
     restype = property(_getrestype, _setrestype, _delrestype)
 
     def _geterrcheck(self):
         return getattr(self, '_errcheck_', None)
-
     def _seterrcheck(self, errcheck):
         if not callable(errcheck):
             raise TypeError("The errcheck attribute must be callable")
         self._errcheck_ = errcheck
-
     def _delerrcheck(self):
         try:
             del self._errcheck_
         except AttributeError:
             pass
-
     errcheck = property(_geterrcheck, _seterrcheck, _delerrcheck)
 
     def _ffishapes(self, args, restype):
@@ -181,6 +190,14 @@
             restype = 'O' # void
         return argtypes, restype
 
+    def _set_address(self, address):
+        if not self._buffer:
+            self._buffer = _rawffi.Array('P')(1)
+        self._buffer[0] = address
+
+    def _get_address(self):
+        return self._buffer[0]
+
     def __init__(self, *args):
         self.name = None
         self._objects = {keepalive_key(0):self}
@@ -188,7 +205,7 @@
 
         # Empty function object -- this is needed for casts
         if not args:
-            self._buffer = _rawffi.Array('P')(1)
+            self._set_address(0)
             return
 
         argsl = list(args)
@@ -196,20 +213,24 @@
 
         # Direct construction from raw address
         if isinstance(argument, (int, long)) and not argsl:
-            ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
-            self._ptr = _rawffi.FuncPtr(argument, ffiargs, ffires, self._flags_)
-            self._buffer = self._ptr.byptr()
+            self._set_address(argument)
+            restype = self._restype_
+            if restype is None:
+                import ctypes
+                restype = ctypes.c_int
+            self._ptr = self._getfuncptr_fromaddress(self._argtypes_, restype)
             return
 
-        # A callback into Python
+        
+        # A callback into python
         if callable(argument) and not argsl:
             self.callable = argument
             ffiargs, ffires = self._ffishapes(self._argtypes_, self._restype_)
             if self._restype_ is None:
                 ffires = None
-            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(
-                argument, self.argtypes
-                ), ffiargs, ffires, self._flags_)
+            self._ptr = _rawffi.CallbackPtr(self._wrap_callable(argument,
+                                                                self.argtypes),
+                                            ffiargs, ffires, self._flags_)
             self._buffer = self._ptr.byptr()
             return
 
@@ -218,7 +239,7 @@
             import ctypes
             self.name, dll = argument
             if isinstance(dll, str):
-                self.dll = ctypes.CDLL(dll)
+                self.dll = ctypes.CDLL(self.dll)
             else:
                 self.dll = dll
             if argsl:
@@ -227,7 +248,7 @@
                     raise TypeError("Unknown constructor %s" % (args,))
             # We need to check dll anyway
             ptr = self._getfuncptr([], ctypes.c_int)
-            self._buffer = ptr.byptr()
+            self._set_address(ptr.getaddr())
             return
 
         # A COM function call, by index
@@ -270,15 +291,15 @@
                     # than the length of the argtypes tuple.
                     args = args[:len(self._argtypes_)]
             else:
-                plural = len(argtypes) > 1 and "s" or ""
+                plural = len(self._argtypes_) > 1 and "s" or ""
                 raise TypeError(
                     "This function takes %d argument%s (%s given)"
-                    % (len(argtypes), plural, len(args)))
+                    % (len(self._argtypes_), plural, len(args)))
 
             # check that arguments are convertible
             ## XXX Not as long as ctypes.cast is a callback function with
             ## py_object arguments...
-            ## self._convert_args(argtypes, args, {})
+            ## self._convert_args(self._argtypes_, args, {})
 
             try:
                 res = self.callable(*args)
@@ -306,83 +327,75 @@
                 raise ValueError(
                     "native COM method call without 'this' parameter"
                     )
-            thisarg = cast(args[0], POINTER(POINTER(c_void_p))).contents
-            argtypes = [c_void_p] + list(argtypes)
-            args = list(args)
-            args[0] = args[0].value
+            thisarg = cast(args[0], POINTER(POINTER(c_void_p)))
+            keepalives, newargs, argtypes, outargs = self._convert_args(argtypes,
+                                                                        args[1:], kwargs)
+            newargs.insert(0, args[0].value)
+            argtypes.insert(0, c_void_p)
         else:
             thisarg = None
+            keepalives, newargs, argtypes, outargs = self._convert_args(argtypes,
+                                                                        args, kwargs)
 
-        args, outargs = self._convert_args(argtypes, args, kwargs)
-        argtypes = [type(arg) for arg in args]
+        funcptr = self._getfuncptr(argtypes, self._restype_, thisarg)
+        result = self._call_funcptr(funcptr, *newargs)
+        result = self._do_errcheck(result, args)
 
-        restype = self._restype_
-        funcptr = self._getfuncptr(argtypes, restype, thisarg)
+        if not outargs:
+            return result
+
+        simple_cdata = type(c_void_p()).__bases__[0]
+        outargs = [x.value if type(x).__bases__[0] is simple_cdata else x
+                   for x in outargs]
+
+        if len(outargs) == 1:
+            return outargs[0]
+        return tuple(outargs)
+
+    def _call_funcptr(self, funcptr, *newargs):
+
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
             set_errno(_rawffi.get_errno())
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
             set_last_error(_rawffi.get_last_error())
         try:
-            resbuffer = funcptr(*[arg._get_buffer_for_param()._buffer
-                                  for arg in args])
+            result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
                 set_errno(_rawffi.get_errno())
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
                 set_last_error(_rawffi.get_last_error())
+        #
+        return self._build_result(self._restype_, result, newargs)
 
-        result = None
-        if self._com_index:
-            if resbuffer[0] & 0x80000000:
-                raise get_com_error(resbuffer[0],
-                                    self._com_iid, args[0])
-            else:
-                result = int(resbuffer[0])
-        elif restype is not None:
-            checker = getattr(self.restype, '_check_retval_', None)
-            if checker:
-                val = restype(resbuffer[0])
-                # the original ctypes seems to make the distinction between
-                # classes defining a new type, and their subclasses
-                if '_type_' in restype.__dict__:
-                    val = val.value
-                result = checker(val)
-            elif not isinstance(restype, _CDataMeta):
-                result = restype(resbuffer[0])
-            else:
-                result = restype._CData_retval(resbuffer)
-
+    def _do_errcheck(self, result, args):
         # The 'errcheck' protocol
         if self._errcheck_:
             v = self._errcheck_(result, self, args)
             # If the errcheck funtion failed, let it throw
-            # If the errcheck function returned callargs unchanged,
+            # If the errcheck function returned newargs unchanged,
             # continue normal processing.
             # If the errcheck function returned something else,
             # use that as result.
             if v is not args:
-                result = v
+                return v
+        return result
 
-        if not outargs:
-            return result
-
-        if len(outargs) == 1:
-            return outargs[0]
-
-        return tuple(outargs)
+    def _getfuncptr_fromaddress(self, argtypes, restype):
+        address = self._get_address()
+        ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+        ffires = restype.get_ffi_argtype()
+        return _ffi.FuncPtr.fromaddr(address, '', ffiargs, ffires)
 
     def _getfuncptr(self, argtypes, restype, thisarg=None):
-        if self._ptr is not None and argtypes is self._argtypes_:
+        if self._ptr is not None and (argtypes is self._argtypes_ or argtypes == self._argtypes_):
             return self._ptr
         if restype is None or not isinstance(restype, _CDataMeta):
             import ctypes
             restype = ctypes.c_int
-        argshapes = [arg._ffiargshape for arg in argtypes]
-        resshape = restype._ffiargshape
         if self._buffer is not None:
-            ptr = _rawffi.FuncPtr(self._buffer[0], argshapes, resshape,
-                                  self._flags_)
-            if argtypes is self._argtypes_:
+            ptr = self._getfuncptr_fromaddress(argtypes, restype)
+            if argtypes == self._argtypes_:
                 self._ptr = ptr
             return ptr
 
@@ -390,15 +403,21 @@
             # extract the address from the object's virtual table
             if not thisarg:
                 raise ValueError("COM method call without VTable")
-            ptr = thisarg[self._com_index - 0x1000]
-            return _rawffi.FuncPtr(ptr, argshapes, resshape, self._flags_)
-
+            ptr = thisarg[0][self._com_index - 0x1000]
+            ffiargs = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffires = restype.get_ffi_argtype()
+            return _ffi.FuncPtr.fromaddr(ptr, '', ffiargs, ffires)
+        
         cdll = self.dll._handle
         try:
-            return cdll.ptr(self.name, argshapes, resshape, self._flags_)
+            ffi_argtypes = [argtype.get_ffi_argtype() for argtype in argtypes]
+            ffi_restype = restype.get_ffi_argtype()
+            self._ptr = cdll.getfunc(self.name, ffi_argtypes, ffi_restype)
+            return self._ptr
         except AttributeError:
             if self._flags_ & _rawffi.FUNCFLAG_CDECL:
                 raise
+
             # Win64 has no stdcall calling conv, so it should also not have the
             # name mangling of it.
             if WIN64:
@@ -409,23 +428,32 @@
             for i in range(33):
                 mangled_name = "_%s@%d" % (self.name, i*4)
                 try:
-                    return cdll.ptr(mangled_name, argshapes, resshape,
-                                    self._flags_)
+                    return cdll.getfunc(mangled_name,
+                                        ffi_argtypes, ffi_restype,
+                                        # XXX self._flags_
+                                        )
                 except AttributeError:
                     pass
             raise
 
-    @staticmethod
-    def _conv_param(argtype, arg):
-        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+    @classmethod
+    def _conv_param(cls, argtype, arg):
+        if isinstance(argtype, _CDataMeta):
+            cobj, ffiparam = argtype.get_ffi_param(arg)
+            return cobj, ffiparam, argtype
+        
         if argtype is not None:
             arg = argtype.from_param(arg)
         if hasattr(arg, '_as_parameter_'):
             arg = arg._as_parameter_
         if isinstance(arg, _CData):
-            # The usual case when argtype is defined
-            cobj = arg
-        elif isinstance(arg, str):
+            return arg, arg._to_ffi_param(), type(arg)
+        #
+        # non-usual case: we do the import here to save a lot of code in the
+        # jit trace of the normal case
+        from ctypes import c_char_p, c_wchar_p, c_void_p, c_int
+        #
+        if isinstance(arg, str):
             cobj = c_char_p(arg)
         elif isinstance(arg, unicode):
             cobj = c_wchar_p(arg)
@@ -435,18 +463,17 @@
             cobj = c_int(arg)
         else:
             raise TypeError("Don't know how to handle %s" % (arg,))
-        return cobj
+
+        return cobj, cobj._to_ffi_param(), type(cobj)
 
     def _convert_args(self, argtypes, args, kwargs, marker=object()):
-        callargs = []
+        newargs = []
         outargs = []
+        keepalives = []
+        newargtypes = []
         total = len(args)
         paramflags = self._paramflags
-
-        if self._com_index:
-            inargs_idx = 1
-        else:
-            inargs_idx = 0
+        inargs_idx = 0
 
         if not paramflags and total < len(argtypes):
             raise TypeError("not enough arguments")
@@ -470,8 +497,10 @@
                     val = defval
                     if val is marker:
                         val = 0
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    keepalive, newarg, newargtype = self._conv_param(argtype, val)
+                    keepalives.append(keepalive)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag in (0, PARAMFLAG_FIN):
                     if inargs_idx < total:
                         val = args[inargs_idx]
@@ -485,38 +514,107 @@
                         raise TypeError("required argument '%s' missing" % name)
                     else:
                         raise TypeError("not enough arguments")
-                    wrapped = self._conv_param(argtype, val)
-                    callargs.append(wrapped)
+                    keepalive, newarg, newargtype = self._conv_param(argtype, val)
+                    keepalives.append(keepalive)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 elif flag == PARAMFLAG_FOUT:
                     if defval is not marker:
                         outargs.append(defval)
-                        wrapped = self._conv_param(argtype, defval)
+                        keepalive, newarg, newargtype = self._conv_param(argtype, defval)
                     else:
                         import ctypes
                         val = argtype._type_()
                         outargs.append(val)
-                        wrapped = ctypes.byref(val)
-                    callargs.append(wrapped)
+                        keepalive = None
+                        newarg = ctypes.byref(val)
+                        newargtype = type(newarg)
+                    keepalives.append(keepalive)
+                    newargs.append(newarg)
+                    newargtypes.append(newargtype)
                 else:
                     raise ValueError("paramflag %d not yet implemented" % flag)
             else:
                 try:
-                    wrapped = self._conv_param(argtype, args[i])
+                    keepalive, newarg, newargtype = self._conv_param(argtype, args[i])
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                keepalives.append(keepalive)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
                 inargs_idx += 1
 
-        if len(callargs) < total:
-            extra = args[len(callargs):]
+        if len(newargs) < len(args):
+            extra = args[len(newargs):]
             for i, arg in enumerate(extra):
                 try:
-                    wrapped = self._conv_param(None, arg)
+                    keepalive, newarg, newargtype = self._conv_param(None, arg)
                 except (UnicodeError, TypeError, ValueError), e:
                     raise ArgumentError(str(e))
-                callargs.append(wrapped)
+                keepalives.append(keepalive)
+                newargs.append(newarg)
+                newargtypes.append(newargtype)
+        return keepalives, newargs, newargtypes, outargs
 
-        return callargs, outargs
+    
+    def _wrap_result(self, restype, result):
+        """
+        Convert from low-level repr of the result to the high-level python
+        one.
+        """
+        # hack for performance: if restype is a "simple" primitive type, don't
+        # allocate the buffer because it's going to be thrown away immediately
+        if restype.__bases__[0] is _SimpleCData and not restype._is_pointer_like():
+            return result
+        #
+        shape = restype._ffishape
+        if is_struct_shape(shape):
+            buf = result
+        else:
+            buf = _rawffi.Array(shape)(1, autofree=True)
+            buf[0] = result
+        retval = restype._CData_retval(buf)
+        return retval
+
+    def _build_result(self, restype, result, argsandobjs):
+        """Build the function result:
+           If there is no OUT parameter, return the actual function result
+           If there is one OUT parameter, return it
+           If there are many OUT parameters, return a tuple"""
+
+        # XXX: note for the future: the function used to take a "resbuffer",
+        # i.e. an array of ints. Now it takes a result, which is already a
+        # python object. All places that do "resbuffer[0]" should check that
+        # result is actually an int and just use it.
+        #
+        # Also, argsandobjs used to be "args" in __call__, now it's "newargs"
+        # (i.e., the already unwrapped objects). It's used only when we have a
+        # PARAMFLAG_FOUT and it's probably wrong, I'll fix it when I find a
+        # failing test
+
+        retval = None
+
+        if restype is not None:
+            checker = getattr(self.restype, '_check_retval_', None)
+            if checker:
+                val = restype(result)
+                # the original ctypes seems to make the distinction between
+                # classes defining a new type, and their subclasses
+                if '_type_' in restype.__dict__:
+                    val = val.value
+                # XXX Raise a COMError when restype is HRESULT and
+                # checker(val) fails.  How to check for restype == HRESULT?
+                if self._com_index:
+                    if result & 0x80000000:
+                        raise get_com_error(result, None, None)
+                else:
+                    retval = checker(val)
+            elif not isinstance(restype, _CDataMeta):
+                retval = restype(result)
+            else:
+                retval = self._wrap_result(restype, result)
+
+        return retval
 
     def __nonzero__(self):
         return self._com_index is not None or bool(self._buffer[0])
@@ -532,3 +630,61 @@
                 self._ptr.free()
                 self._ptr = None
             self._needs_free = False
+
+
+def make_fastpath_subclass(CFuncPtr):
+    if CFuncPtr._is_fastpath:
+        return CFuncPtr
+    #
+    try:
+        return make_fastpath_subclass.memo[CFuncPtr]
+    except KeyError:
+        pass
+
+    class CFuncPtrFast(CFuncPtr):
+
+        _is_fastpath = True
+        _slowpath_allowed = True # set to False by tests
+
+        @classmethod
+        def enable_fastpath_maybe(cls, obj):
+            if (obj.callable is None and
+                obj._com_index is None):
+                obj.__class__ = cls
+
+        def __rollback(self):
+            assert self._slowpath_allowed
+            self.__class__ = CFuncPtr
+
+        # disable the fast path if we reset argtypes
+        def _setargtypes(self, argtypes):
+            self.__rollback()
+            self._setargtypes(argtypes)
+        argtypes = property(CFuncPtr._getargtypes, _setargtypes)
+
+        def _setcallable(self, func):
+            self.__rollback()
+            self.callable = func
+        callable = property(lambda x: None, _setcallable)
+
+        def _setcom_index(self, idx):
+            self.__rollback()
+            self._com_index = idx
+        _com_index = property(lambda x: None, _setcom_index)
+
+        def __call__(self, *args):
+            thisarg = None
+            argtypes = self._argtypes_
+            restype = self._restype_
+            funcptr = self._getfuncptr(argtypes, restype, thisarg)
+            try:
+                result = self._call_funcptr(funcptr, *args)
+                result = self._do_errcheck(result, args)
+            except (TypeError, ArgumentError): # XXX, should be FFITypeError
+                assert self._slowpath_allowed
+                return CFuncPtr.__call__(self, *args)
+            return result
+
+    make_fastpath_subclass.memo[CFuncPtr] = CFuncPtrFast
+    return CFuncPtrFast
+make_fastpath_subclass.memo = {}
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -1,6 +1,7 @@
 
 import _rawffi
-from _ctypes.basics import _CData, _CDataMeta, cdata_from_address
+import _ffi
+from _ctypes.basics import _CData, _CDataMeta, cdata_from_address, ArgumentError
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
 from _ctypes.basics import sizeof, byref
 from _ctypes.array import Array, array_get_slice_params, array_slice_getitem,\
@@ -19,7 +20,7 @@
             length     = 1,
             _ffiargshape = 'P',
             _ffishape  = 'P',
-            _fficompositesize = None
+            _fficompositesize = None,
         )
         # XXX check if typedict['_type_'] is any sane
         # XXX remember about paramfunc
@@ -66,6 +67,7 @@
         self._ffiarray = ffiarray
         self.__init__ = __init__
         self._type_ = TP
+        self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype())
 
     from_address = cdata_from_address
 
@@ -114,6 +116,17 @@
 
     contents = property(getcontents, setcontents)
 
+    def _as_ffi_pointer_(self, ffitype):
+        return as_ffi_pointer(self, ffitype)
+
+def as_ffi_pointer(value, ffitype):
+    my_ffitype = type(value).get_ffi_argtype()
+    # for now, we always allow types.pointer, else a lot of tests
+    # break. We need to rethink how pointers are represented, though
+    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
+        raise ArgumentError, "expected %s instance, got %s" % (type(value), ffitype)
+    return value._get_buffer_value()
+
 def _cast_addr(obj, _, tp):
     if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()):
         raise TypeError("cast() argument 2 must be a pointer type, not %s"
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -1,3 +1,4 @@
+import _ffi
 import _rawffi
 import weakref
 import sys
@@ -8,7 +9,7 @@
      CArgObject
 from _ctypes.builtin import ConvMode
 from _ctypes.array import Array
-from _ctypes.pointer import _Pointer
+from _ctypes.pointer import _Pointer, as_ffi_pointer
 
 class NULL(object):
     pass
@@ -140,6 +141,8 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.char)
+
         elif tp == 'Z':
             # c_wchar_p
             def _getvalue(self):
@@ -162,6 +165,7 @@
                     value = 0
                 self._buffer[0] = value
             result.value = property(_getvalue, _setvalue)
+            result._ffiargtype = _ffi.types.Pointer(_ffi.types.unichar)
 
         elif tp == 'P':
             # c_void_p
@@ -212,10 +216,15 @@
             result.value = property(_getvalue, _setvalue)
 
         elif tp == 'X':
-            from ctypes import windll
-            SysAllocStringLen = windll.oleaut32.SysAllocStringLen
-            SysStringLen = windll.oleaut32.SysStringLen
-            SysFreeString = windll.oleaut32.SysFreeString
+            from ctypes import WinDLL
+            # Use WinDLL("oleaut32") instead of windll.oleaut32
+            # because the latter is a shared (cached) object; and
+            # other code may set their own restypes. We need out own
+            # restype here.
+            oleaut32 = WinDLL("oleaut32")
+            SysAllocStringLen = oleaut32.SysAllocStringLen
+            SysStringLen = oleaut32.SysStringLen
+            SysFreeString = oleaut32.SysFreeString
             def _getvalue(self):
                 addr = self._buffer[0]
                 if addr == 0:
@@ -248,6 +257,12 @@
                     self._buffer[0] = 0  # VARIANT_FALSE
             result.value = property(_getvalue, _setvalue)
 
+        # make pointer-types compatible with the _ffi fast path
+        if result._is_pointer_like():
+            def _as_ffi_pointer_(self, ffitype):
+                return as_ffi_pointer(self, ffitype)
+            result._as_ffi_pointer_ = _as_ffi_pointer_
+            
         return result
 
     from_address = cdata_from_address
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -240,6 +240,9 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _to_ffi_param(self):
+        return self._buffer
+
 
 class StructureMeta(StructOrUnionMeta):
     _is_union = False
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -275,7 +275,8 @@
     return unicode(x, 'utf-8')
 
 class Connection(object):
-    def __init__(self, database, isolation_level="", detect_types=0, timeout=None, cached_statements=None, factory=None):
+    def __init__(self, database, timeout=5.0, detect_types=0, isolation_level="",
+                 check_same_thread=True, factory=None, cached_statements=100):
         self.db = c_void_p()
         if sqlite.sqlite3_open(database, byref(self.db)) != SQLITE_OK:
             raise OperationalError("Could not open database")
@@ -308,7 +309,8 @@
         self._aggregates = {}
         self.aggregate_instances = {}
         self._collations = {}
-        self.thread_ident = thread_get_ident()
+        if check_same_thread:
+            self.thread_ident = thread_get_ident()
 
     def _get_exception(self, error_code = None):
         if error_code is None:
diff --git a/lib_pypy/binascii.py b/lib_pypy/binascii.py
--- a/lib_pypy/binascii.py
+++ b/lib_pypy/binascii.py
@@ -659,7 +659,7 @@
         crc = crc_32_tab[(crc ^ long(ord(c))) & 0xffL] ^ (crc >> 8)
         #/* Note:  (crc >> 8) MUST zero fill on left
 
-        result = crc ^ 0xffffffffL
+    result = crc ^ 0xffffffffL
     
     if result > 2**31:
         result = ((result + 2**31) % 2**32) - 2**31
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -27,9 +27,9 @@
             PythonPickler.__init__(self, self.__f, args[0], **kw)
         else:
             PythonPickler.__init__(self, *args, **kw)
-            
+
     def memoize(self, obj):
-        self.memo[None] = None   # cPickle starts counting at one
+        self.memo[id(None)] = None   # cPickle starts counting at one
         return PythonPickler.memoize(self, obj)
 
     def getvalue(self):
diff --git a/lib_pypy/ctypes_support.py b/lib_pypy/ctypes_support.py
--- a/lib_pypy/ctypes_support.py
+++ b/lib_pypy/ctypes_support.py
@@ -10,8 +10,8 @@
 # __________ the standard C library __________
 
 if sys.platform == 'win32':
-    import _rawffi
-    standard_c_lib = ctypes.CDLL('msvcrt', handle=_rawffi.get_libc())
+    import _ffi
+    standard_c_lib = ctypes.CDLL('msvcrt', handle=_ffi.get_libc())
 else:
     standard_c_lib = ctypes.CDLL(ctypes.util.find_library('c'))
 
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1422,12 +1422,17 @@
             converter = _time.localtime
         else:
             converter = _time.gmtime
-        if 1 - (t % 1.0) < 0.000001:
-            t = float(int(t)) + 1
-        if t < 0:
-            t -= 1
+        if t < 0.0:
+            us = int(round(((-t) % 1.0) * 1000000))
+            if us > 0:
+                us = 1000000 - us
+                t -= 1.0
+        else:
+            us = int(round((t % 1.0) * 1000000))
+            if us == 1000000:
+                us = 0
+                t += 1.0
         y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
-        us = int((t % 1.0) * 1000000)
         ss = min(ss, 59)    # clamp out leap seconds if the platform has them
         result = cls(y, m, d, hh, mm, ss, us, tz)
         if tz is not None:
diff --git a/lib_pypy/msvcrt.py b/lib_pypy/msvcrt.py
--- a/lib_pypy/msvcrt.py
+++ b/lib_pypy/msvcrt.py
@@ -46,4 +46,42 @@
         e = get_errno()
         raise IOError(e, errno.errorcode[e])
 
+# Console I/O routines
+
+kbhit = _c._kbhit
+kbhit.argtypes = []
+kbhit.restype = ctypes.c_int
+
+getch = _c._getch
+getch.argtypes = []
+getch.restype = ctypes.c_char
+
+getwch = _c._getwch
+getwch.argtypes = []
+getwch.restype = ctypes.c_wchar
+
+getche = _c._getche
+getche.argtypes = []
+getche.restype = ctypes.c_char
+
+getwche = _c._getwche
+getwche.argtypes = []
+getwche.restype = ctypes.c_wchar
+
+putch = _c._putch
+putch.argtypes = [ctypes.c_char]
+putch.restype = None
+
+putwch = _c._putwch
+putwch.argtypes = [ctypes.c_wchar]
+putwch.restype = None
+
+ungetch = _c._ungetch
+ungetch.argtypes = [ctypes.c_char]
+ungetch.restype = None
+
+ungetwch = _c._ungetwch
+ungetwch.argtypes = [ctypes.c_wchar]
+ungetwch.restype = None
+
 del ctypes
diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py
--- a/lib_pypy/pwd.py
+++ b/lib_pypy/pwd.py
@@ -16,6 +16,7 @@
 
 from ctypes_support import standard_c_lib as libc
 from ctypes import Structure, POINTER, c_int, c_char_p, c_long
+from _structseq import structseqtype, structseqfield
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
@@ -68,7 +69,7 @@
             yield self.pw_dir
             yield self.pw_shell
 
-class struct_passwd(tuple):
+class struct_passwd:
     """
     pwd.struct_passwd: Results from getpw*() routines.
 
@@ -76,15 +77,15 @@
       (pw_name,pw_passwd,pw_uid,pw_gid,pw_gecos,pw_dir,pw_shell)
     or via the object attributes as named in the above tuple.
     """
-    def __init__(self, passwd):
-        self.pw_name = passwd.pw_name
-        self.pw_passwd = passwd.pw_passwd
-        self.pw_uid = passwd.pw_uid
-        self.pw_gid = passwd.pw_gid
-        self.pw_gecos = passwd.pw_gecos
-        self.pw_dir = passwd.pw_dir
-        self.pw_shell = passwd.pw_shell
-        tuple.__init__(self, passwd)
+    __metaclass__ = structseqtype
+    name = "pwd.struct_passwd"
+    pw_name = structseqfield(0)
+    pw_passwd = structseqfield(1)
+    pw_uid = structseqfield(2)
+    pw_gid = structseqfield(3)
+    pw_gecos = structseqfield(4)
+    pw_dir = structseqfield(5)
+    pw_shell = structseqfield(6)
 
 passwd_p = POINTER(passwd)
 
diff --git a/lib_pypy/pypy_test/test_datetime.py b/lib_pypy/pypy_test/test_datetime.py
--- a/lib_pypy/pypy_test/test_datetime.py
+++ b/lib_pypy/pypy_test/test_datetime.py
@@ -32,4 +32,28 @@
     assert datetime.datetime.utcfromtimestamp(a).microsecond == 0
     assert datetime.datetime.utcfromtimestamp(a).second == 1
 
-    
+def test_more_datetime_rounding():
+    # this test verified on top of CPython 2.7 (using a plain
+    # "import datetime" above)
+    expected_results = {
+        -1000.0: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.9999996: 'datetime.datetime(1970, 1, 1, 0, 43, 20)',
+        -999.4: 'datetime.datetime(1970, 1, 1, 0, 43, 20, 600000)',
+        -999.0000004: 'datetime.datetime(1970, 1, 1, 0, 43, 21)',
+        -1.0: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.9999996: 'datetime.datetime(1970, 1, 1, 0, 59, 59)',
+        -0.4: 'datetime.datetime(1970, 1, 1, 0, 59, 59, 600000)',
+        -0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.0000004: 'datetime.datetime(1970, 1, 1, 1, 0)',
+        0.4: 'datetime.datetime(1970, 1, 1, 1, 0, 0, 400000)',
+        0.9999996: 'datetime.datetime(1970, 1, 1, 1, 0, 1)',
+        1000.0: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.0000004: 'datetime.datetime(1970, 1, 1, 1, 16, 40)',
+        1000.4: 'datetime.datetime(1970, 1, 1, 1, 16, 40, 400000)',
+        1000.9999996: 'datetime.datetime(1970, 1, 1, 1, 16, 41)',
+        1293843661.191: 'datetime.datetime(2011, 1, 1, 2, 1, 1, 191000)',
+        }
+    for t in sorted(expected_results):
+        dt = datetime.datetime.fromtimestamp(t)
+        assert repr(dt) == expected_results[t]
diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py
--- a/lib_pypy/resource.py
+++ b/lib_pypy/resource.py
@@ -7,7 +7,7 @@
 
 from ctypes_support import standard_c_lib as libc
 from ctypes_support import get_errno
-from ctypes import Structure, c_int, c_long, byref, sizeof
+from ctypes import Structure, c_int, c_long, byref, sizeof, POINTER
 from errno import EINVAL, EPERM
 import _structseq
 
@@ -25,6 +25,8 @@
 _setrlimit = libc.setrlimit
 try:
     _getpagesize = libc.getpagesize
+    _getpagesize.argtypes = ()
+    _getpagesize.restype = c_int
 except AttributeError:
     from os import sysconf
     _getpagesize = None
@@ -61,6 +63,10 @@
         ("ru_nivcsw", c_long),
     )
 
+_getrusage.argtypes = (c_int, POINTER(_struct_rusage))
+_getrusage.restype = c_int
+
+
 class struct_rusage:
     __metaclass__ = _structseq.structseqtype
 
@@ -94,6 +100,12 @@
         ("rlim_max", rlim_t),
     )
 
+_getrlimit.argtypes = (c_int, POINTER(rlimit))
+_getrlimit.restype = c_int
+_setrlimit.argtypes = (c_int, POINTER(rlimit))
+_setrlimit.restype = c_int
+
+
 @builtinify
 def getrusage(who):
     ru = _struct_rusage()
diff --git a/py/__init__.py b/py/__init__.py
--- a/py/__init__.py
+++ b/py/__init__.py
@@ -8,7 +8,7 @@
 
 (c) Holger Krekel and others, 2004-2010
 """
-__version__ = '1.4.3'
+__version__ = '1.4.4.dev1'
 
 from py import _apipkg
 
@@ -70,10 +70,6 @@
         'getrawcode'        : '._code.code:getrawcode',
         'patch_builtins'    : '._code.code:patch_builtins',
         'unpatch_builtins'  : '._code.code:unpatch_builtins',
-        '_AssertionError'   : '._code.assertion:AssertionError',
-        '_reinterpret_old'  : '._code.assertion:reinterpret_old',
-        '_reinterpret'      : '._code.assertion:reinterpret',
-        '_reprcompare'      : '._code.assertion:_reprcompare',
     },
 
     # backports and additions of builtins
diff --git a/py/_code/_assertionnew.py b/py/_code/_assertionnew.py
deleted file mode 100644
--- a/py/_code/_assertionnew.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""
-Find intermediate evalutation results in assert statements through builtin AST.
-This should replace _assertionold.py eventually.
-"""
-
-import sys
-import ast
-
-import py
-from py._code.assertion import _format_explanation, BuiltinAssertionError
-
-
-if sys.platform.startswith("java") and sys.version_info < (2, 5, 2):
-    # See http://bugs.jython.org/issue1497
-    _exprs = ("BoolOp", "BinOp", "UnaryOp", "Lambda", "IfExp", "Dict",
-              "ListComp", "GeneratorExp", "Yield", "Compare", "Call",
-              "Repr", "Num", "Str", "Attribute", "Subscript", "Name",
-              "List", "Tuple")
-    _stmts = ("FunctionDef", "ClassDef", "Return", "Delete", "Assign",
-              "AugAssign", "Print", "For", "While", "If", "With", "Raise",
-              "TryExcept", "TryFinally", "Assert", "Import", "ImportFrom",
-              "Exec", "Global", "Expr", "Pass", "Break", "Continue")
-    _expr_nodes = set(getattr(ast, name) for name in _exprs)
-    _stmt_nodes = set(getattr(ast, name) for name in _stmts)
-    def _is_ast_expr(node):
-        return node.__class__ in _expr_nodes
-    def _is_ast_stmt(node):
-        return node.__class__ in _stmt_nodes
-else:
-    def _is_ast_expr(node):
-        return isinstance(node, ast.expr)
-    def _is_ast_stmt(node):
-        return isinstance(node, ast.stmt)
-
-
-class Failure(Exception):
-    """Error found while interpreting AST."""
-
-    def __init__(self, explanation=""):
-        self.cause = sys.exc_info()
-        self.explanation = explanation
-
-
-def interpret(source, frame, should_fail=False):
-    mod = ast.parse(source)
-    visitor = DebugInterpreter(frame)
-    try:
-        visitor.visit(mod)
-    except Failure:
-        failure = sys.exc_info()[1]
-        return getfailure(failure)
-    if should_fail:
-        return ("(assertion failed, but when it was re-run for "
-                "printing intermediate values, it did not fail.  Suggestions: "
-                "compute assert expression before the assert or use --no-assert)")
-
-def run(offending_line, frame=None):
-    if frame is None:
-        frame = py.code.Frame(sys._getframe(1))
-    return interpret(offending_line, frame)
-
-def getfailure(failure):
-    explanation = _format_explanation(failure.explanation)
-    value = failure.cause[1]
-    if str(value):
-        lines = explanation.splitlines()
-        if not lines:
-            lines.append("")
-        lines[0] += " << %s" % (value,)
-        explanation = "\n".join(lines)
-    text = "%s: %s" % (failure.cause[0].__name__, explanation)
-    if text.startswith("AssertionError: assert "):
-        text = text[16:]
-    return text
-
-
-operator_map = {
-    ast.BitOr : "|",
-    ast.BitXor : "^",
-    ast.BitAnd : "&",
-    ast.LShift : "<<",
-    ast.RShift : ">>",
-    ast.Add : "+",
-    ast.Sub : "-",
-    ast.Mult : "*",
-    ast.Div : "/",
-    ast.FloorDiv : "//",
-    ast.Mod : "%",
-    ast.Eq : "==",
-    ast.NotEq : "!=",
-    ast.Lt : "<",
-    ast.LtE : "<=",
-    ast.Gt : ">",
-    ast.GtE : ">=",
-    ast.Pow : "**",
-    ast.Is : "is",
-    ast.IsNot : "is not",
-    ast.In : "in",
-    ast.NotIn : "not in"
-}
-
-unary_map = {
-    ast.Not : "not %s",
-    ast.Invert : "~%s",
-    ast.USub : "-%s",
-    ast.UAdd : "+%s"
-}
-
-
-class DebugInterpreter(ast.NodeVisitor):
-    """Interpret AST nodes to gleam useful debugging information. """
-
-    def __init__(self, frame):
-        self.frame = frame
-
-    def generic_visit(self, node):
-        # Fallback when we don't have a special implementation.
-        if _is_ast_expr(node):
-            mod = ast.Expression(node)
-            co = self._compile(mod)
-            try:
-                result = self.frame.eval(co)
-            except Exception:
-                raise Failure()
-            explanation = self.frame.repr(result)
-            return explanation, result
-        elif _is_ast_stmt(node):
-            mod = ast.Module([node])
-            co = self._compile(mod, "exec")
-            try:
-                self.frame.exec_(co)
-            except Exception:
-                raise Failure()
-            return None, None
-        else:
-            raise AssertionError("can't handle %s" %(node,))
-
-    def _compile(self, source, mode="eval"):
-        return compile(source, "<assertion interpretation>", mode)
-
-    def visit_Expr(self, expr):
-        return self.visit(expr.value)
-
-    def visit_Module(self, mod):
-        for stmt in mod.body:
-            self.visit(stmt)
-
-    def visit_Name(self, name):
-        explanation, result = self.generic_visit(name)
-        # See if the name is local.
-        source = "%r in locals() is not globals()" % (name.id,)
-        co = self._compile(source)
-        try:
-            local = self.frame.eval(co)
-        except Exception:
-            # have to assume it isn't
-            local = False
-        if not local:
-            return name.id, result
-        return explanation, result
-
-    def visit_Compare(self, comp):
-        left = comp.left
-        left_explanation, left_result = self.visit(left)
-        for op, next_op in zip(comp.ops, comp.comparators):
-            next_explanation, next_result = self.visit(next_op)
-            op_symbol = operator_map[op.__class__]
-            explanation = "%s %s %s" % (left_explanation, op_symbol,
-                                        next_explanation)
-            source = "__exprinfo_left %s __exprinfo_right" % (op_symbol,)
-            co = self._compile(source)
-            try:
-                result = self.frame.eval(co, __exprinfo_left=left_result,
-                                         __exprinfo_right=next_result)
-            except Exception:
-                raise Failure(explanation)
-            try:
-                if not result:
-                    break
-            except KeyboardInterrupt:
-                raise
-            except:
-                break
-            left_explanation, left_result = next_explanation, next_result
-
-        rcomp = py.code._reprcompare
-        if rcomp:
-            res = rcomp(op_symbol, left_result, next_result)
-            if res:
-                explanation = res
-        return explanation, result
-
-    def visit_BoolOp(self, boolop):
-        is_or = isinstance(boolop.op, ast.Or)
-        explanations = []
-        for operand in boolop.values:
-            explanation, result = self.visit(operand)
-            explanations.append(explanation)
-            if result == is_or:
-                break
-        name = is_or and " or " or " and "
-        explanation = "(" + name.join(explanations) + ")"
-        return explanation, result
-
-    def visit_UnaryOp(self, unary):
-        pattern = unary_map[unary.op.__class__]
-        operand_explanation, operand_result = self.visit(unary.operand)
-        explanation = pattern % (operand_explanation,)
-        co = self._compile(pattern % ("__exprinfo_expr",))
-        try:
-            result = self.frame.eval(co, __exprinfo_expr=operand_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, result
-
-    def visit_BinOp(self, binop):
-        left_explanation, left_result = self.visit(binop.left)
-        right_explanation, right_result = self.visit(binop.right)
-        symbol = operator_map[binop.op.__class__]
-        explanation = "(%s %s %s)" % (left_explanation, symbol,
-                                      right_explanation)
-        source = "__exprinfo_left %s __exprinfo_right" % (symbol,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, __exprinfo_left=left_result,
-                                     __exprinfo_right=right_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, result
-
-    def visit_Call(self, call):
-        func_explanation, func = self.visit(call.func)
-        arg_explanations = []
-        ns = {"__exprinfo_func" : func}
-        arguments = []
-        for arg in call.args:
-            arg_explanation, arg_result = self.visit(arg)
-            arg_name = "__exprinfo_%s" % (len(ns),)
-            ns[arg_name] = arg_result
-            arguments.append(arg_name)
-            arg_explanations.append(arg_explanation)
-        for keyword in call.keywords:
-            arg_explanation, arg_result = self.visit(keyword.value)
-            arg_name = "__exprinfo_%s" % (len(ns),)
-            ns[arg_name] = arg_result
-            keyword_source = "%s=%%s" % (keyword.arg)
-            arguments.append(keyword_source % (arg_name,))
-            arg_explanations.append(keyword_source % (arg_explanation,))
-        if call.starargs:
-            arg_explanation, arg_result = self.visit(call.starargs)
-            arg_name = "__exprinfo_star"
-            ns[arg_name] = arg_result
-            arguments.append("*%s" % (arg_name,))
-            arg_explanations.append("*%s" % (arg_explanation,))
-        if call.kwargs:
-            arg_explanation, arg_result = self.visit(call.kwargs)
-            arg_name = "__exprinfo_kwds"
-            ns[arg_name] = arg_result
-            arguments.append("**%s" % (arg_name,))
-            arg_explanations.append("**%s" % (arg_explanation,))
-        args_explained = ", ".join(arg_explanations)
-        explanation = "%s(%s)" % (func_explanation, args_explained)
-        args = ", ".join(arguments)
-        source = "__exprinfo_func(%s)" % (args,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, **ns)
-        except Exception:
-            raise Failure(explanation)
-        pattern = "%s\n{%s = %s\n}"
-        rep = self.frame.repr(result)
-        explanation = pattern % (rep, rep, explanation)
-        return explanation, result
-
-    def _is_builtin_name(self, name):
-        pattern = "%r not in globals() and %r not in locals()"
-        source = pattern % (name.id, name.id)
-        co = self._compile(source)
-        try:
-            return self.frame.eval(co)
-        except Exception:
-            return False
-
-    def visit_Attribute(self, attr):
-        if not isinstance(attr.ctx, ast.Load):
-            return self.generic_visit(attr)
-        source_explanation, source_result = self.visit(attr.value)
-        explanation = "%s.%s" % (source_explanation, attr.attr)
-        source = "__exprinfo_expr.%s" % (attr.attr,)
-        co = self._compile(source)
-        try:
-            result = self.frame.eval(co, __exprinfo_expr=source_result)
-        except Exception:
-            raise Failure(explanation)
-        explanation = "%s\n{%s = %s.%s\n}" % (self.frame.repr(result),
-                                              self.frame.repr(result),
-                                              source_explanation, attr.attr)
-        # Check if the attr is from an instance.
-        source = "%r in getattr(__exprinfo_expr, '__dict__', {})"
-        source = source % (attr.attr,)
-        co = self._compile(source)
-        try:
-            from_instance = self.frame.eval(co, __exprinfo_expr=source_result)
-        except Exception:
-            from_instance = True
-        if from_instance:
-            rep = self.frame.repr(result)
-            pattern = "%s\n{%s = %s\n}"
-            explanation = pattern % (rep, rep, explanation)
-        return explanation, result
-
-    def visit_Assert(self, assrt):
-        test_explanation, test_result = self.visit(assrt.test)
-        if test_explanation.startswith("False\n{False =") and \
-                test_explanation.endswith("\n"):
-            test_explanation = test_explanation[15:-2]
-        explanation = "assert %s" % (test_explanation,)
-        if not test_result:
-            try:
-                raise BuiltinAssertionError
-            except Exception:
-                raise Failure(explanation)
-        return explanation, test_result
-
-    def visit_Assign(self, assign):
-        value_explanation, value_result = self.visit(assign.value)
-        explanation = "... = %s" % (value_explanation,)
-        name = ast.Name("__exprinfo_expr", ast.Load(),
-                        lineno=assign.value.lineno,
-                        col_offset=assign.value.col_offset)
-        new_assign = ast.Assign(assign.targets, name, lineno=assign.lineno,
-                                col_offset=assign.col_offset)
-        mod = ast.Module([new_assign])
-        co = self._compile(mod, "exec")
-        try:
-            self.frame.exec_(co, __exprinfo_expr=value_result)
-        except Exception:
-            raise Failure(explanation)
-        return explanation, value_result
diff --git a/py/_code/_assertionold.py b/py/_code/_assertionold.py
deleted file mode 100644
--- a/py/_code/_assertionold.py
+++ /dev/null
@@ -1,555 +0,0 @@
-import py
-import sys, inspect
-from compiler import parse, ast, pycodegen
-from py._code.assertion import BuiltinAssertionError, _format_explanation
-
-passthroughex = py.builtin._sysex
-
-class Failure:
-    def __init__(self, node):
-        self.exc, self.value, self.tb = sys.exc_info()
-        self.node = node
-
-class View(object):
-    """View base class.
-
-    If C is a subclass of View, then C(x) creates a proxy object around
-    the object x.  The actual class of the proxy is not C in general,
-    but a *subclass* of C determined by the rules below.  To avoid confusion
-    we call view class the class of the proxy (a subclass of C, so of View)
-    and object class the class of x.
-
-    Attributes and methods not found in the proxy are automatically read on x.
-    Other operations like setting attributes are performed on the proxy, as
-    determined by its view class.  The object x is available from the proxy
-    as its __obj__ attribute.
-
-    The view class selection is determined by the __view__ tuples and the
-    optional __viewkey__ method.  By default, the selected view class is the
-    most specific subclass of C whose __view__ mentions the class of x.
-    If no such subclass is found, the search proceeds with the parent
-    object classes.  For example, C(True) will first look for a subclass
-    of C with __view__ = (..., bool, ...) and only if it doesn't find any
-    look for one with __view__ = (..., int, ...), and then ..., object,...
-    If everything fails the class C itself is considered to be the default.
-
-    Alternatively, the view class selection can be driven by another aspect
-    of the object x, instead of the class of x, by overriding __viewkey__.
-    See last example at the end of this module.
-    """
-
-    _viewcache = {}
-    __view__ = ()
-
-    def __new__(rootclass, obj, *args, **kwds):
-        self = object.__new__(rootclass)
-        self.__obj__ = obj
-        self.__rootclass__ = rootclass
-        key = self.__viewkey__()
-        try:
-            self.__class__ = self._viewcache[key]
-        except KeyError:
-            self.__class__ = self._selectsubclass(key)
-        return self
-
-    def __getattr__(self, attr):
-        # attributes not found in the normal hierarchy rooted on View
-        # are looked up in the object's real class
-        return getattr(self.__obj__, attr)
-
-    def __viewkey__(self):
-        return self.__obj__.__class__
-
-    def __matchkey__(self, key, subclasses):
-        if inspect.isclass(key):
-            keys = inspect.getmro(key)
-        else:
-            keys = [key]
-        for key in keys:
-            result = [C for C in subclasses if key in C.__view__]
-            if result:
-                return result
-        return []
-
-    def _selectsubclass(self, key):
-        subclasses = list(enumsubclasses(self.__rootclass__))
-        for C in subclasses:
-            if not isinstance(C.__view__, tuple):
-                C.__view__ = (C.__view__,)
-        choices = self.__matchkey__(key, subclasses)
-        if not choices:
-            return self.__rootclass__
-        elif len(choices) == 1:
-            return choices[0]
-        else:
-            # combine the multiple choices
-            return type('?', tuple(choices), {})
-
-    def __repr__(self):
-        return '%s(%r)' % (self.__rootclass__.__name__, self.__obj__)
-
-
-def enumsubclasses(cls):
-    for subcls in cls.__subclasses__():
-        for subsubclass in enumsubclasses(subcls):
-            yield subsubclass
-    yield cls
-
-
-class Interpretable(View):
-    """A parse tree node with a few extra methods."""
-    explanation = None
-
-    def is_builtin(self, frame):
-        return False
-
-    def eval(self, frame):
-        # fall-back for unknown expression nodes
-        try:
-            expr = ast.Expression(self.__obj__)
-            expr.filename = '<eval>'
-            self.__obj__.filename = '<eval>'
-            co = pycodegen.ExpressionCodeGenerator(expr).getCode()
-            result = frame.eval(co)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        self.result = result
-        self.explanation = self.explanation or frame.repr(self.result)
-
-    def run(self, frame):
-        # fall-back for unknown statement nodes
-        try:
-            expr = ast.Module(None, ast.Stmt([self.__obj__]))
-            expr.filename = '<run>'
-            co = pycodegen.ModuleCodeGenerator(expr).getCode()
-            frame.exec_(co)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-
-    def nice_explanation(self):
-        return _format_explanation(self.explanation)
-
-
-class Name(Interpretable):
-    __view__ = ast.Name
-
-    def is_local(self, frame):
-        source = '%r in locals() is not globals()' % self.name
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def is_global(self, frame):
-        source = '%r in globals()' % self.name
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def is_builtin(self, frame):
-        source = '%r not in locals() and %r not in globals()' % (
-            self.name, self.name)
-        try:
-            return frame.is_true(frame.eval(source))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def eval(self, frame):
-        super(Name, self).eval(frame)
-        if not self.is_local(frame):
-            self.explanation = self.name
-
-class Compare(Interpretable):
-    __view__ = ast.Compare
-
-    def eval(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        for operation, expr2 in self.ops:
-            if hasattr(self, 'result'):
-                # shortcutting in chained expressions
-                if not frame.is_true(self.result):
-                    break
-            expr2 = Interpretable(expr2)
-            expr2.eval(frame)
-            self.explanation = "%s %s %s" % (
-                expr.explanation, operation, expr2.explanation)
-            source = "__exprinfo_left %s __exprinfo_right" % operation
-            try:
-                self.result = frame.eval(source,
-                                         __exprinfo_left=expr.result,
-                                         __exprinfo_right=expr2.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-            expr = expr2
-
-class And(Interpretable):
-    __view__ = ast.And
-
-    def eval(self, frame):
-        explanations = []
-        for expr in self.nodes:
-            expr = Interpretable(expr)
-            expr.eval(frame)
-            explanations.append(expr.explanation)
-            self.result = expr.result
-            if not frame.is_true(expr.result):
-                break
-        self.explanation = '(' + ' and '.join(explanations) + ')'
-
-class Or(Interpretable):
-    __view__ = ast.Or
-
-    def eval(self, frame):
-        explanations = []
-        for expr in self.nodes:
-            expr = Interpretable(expr)
-            expr.eval(frame)
-            explanations.append(expr.explanation)
-            self.result = expr.result
-            if frame.is_true(expr.result):
-                break
-        self.explanation = '(' + ' or '.join(explanations) + ')'
-
-
-# == Unary operations ==
-keepalive = []
-for astclass, astpattern in {
-    ast.Not    : 'not __exprinfo_expr',
-    ast.Invert : '(~__exprinfo_expr)',
-    }.items():
-
-    class UnaryArith(Interpretable):
-        __view__ = astclass
-
-        def eval(self, frame, astpattern=astpattern):
-            expr = Interpretable(self.expr)
-            expr.eval(frame)
-            self.explanation = astpattern.replace('__exprinfo_expr',
-                                                  expr.explanation)
-            try:
-                self.result = frame.eval(astpattern,
-                                         __exprinfo_expr=expr.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-    keepalive.append(UnaryArith)
-
-# == Binary operations ==
-for astclass, astpattern in {
-    ast.Add    : '(__exprinfo_left + __exprinfo_right)',
-    ast.Sub    : '(__exprinfo_left - __exprinfo_right)',
-    ast.Mul    : '(__exprinfo_left * __exprinfo_right)',
-    ast.Div    : '(__exprinfo_left / __exprinfo_right)',
-    ast.Mod    : '(__exprinfo_left % __exprinfo_right)',
-    ast.Power  : '(__exprinfo_left ** __exprinfo_right)',
-    }.items():
-
-    class BinaryArith(Interpretable):
-        __view__ = astclass
-
-        def eval(self, frame, astpattern=astpattern):
-            left = Interpretable(self.left)
-            left.eval(frame)
-            right = Interpretable(self.right)
-            right.eval(frame)
-            self.explanation = (astpattern
-                                .replace('__exprinfo_left',  left .explanation)
-                                .replace('__exprinfo_right', right.explanation))
-            try:
-                self.result = frame.eval(astpattern,
-                                         __exprinfo_left=left.result,
-                                         __exprinfo_right=right.result)
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-    keepalive.append(BinaryArith)
-
-
-class CallFunc(Interpretable):
-    __view__ = ast.CallFunc
-
-    def is_bool(self, frame):
-        source = 'isinstance(__exprinfo_value, bool)'
-        try:
-            return frame.is_true(frame.eval(source,
-                                            __exprinfo_value=self.result))
-        except passthroughex:
-            raise
-        except:
-            return False
-
-    def eval(self, frame):
-        node = Interpretable(self.node)
-        node.eval(frame)
-        explanations = []
-        vars = {'__exprinfo_fn': node.result}
-        source = '__exprinfo_fn('
-        for a in self.args:
-            if isinstance(a, ast.Keyword):
-                keyword = a.name
-                a = a.expr
-            else:
-                keyword = None
-            a = Interpretable(a)
-            a.eval(frame)
-            argname = '__exprinfo_%d' % len(vars)
-            vars[argname] = a.result
-            if keyword is None:
-                source += argname + ','
-                explanations.append(a.explanation)
-            else:
-                source += '%s=%s,' % (keyword, argname)
-                explanations.append('%s=%s' % (keyword, a.explanation))
-        if self.star_args:
-            star_args = Interpretable(self.star_args)
-            star_args.eval(frame)
-            argname = '__exprinfo_star'
-            vars[argname] = star_args.result
-            source += '*' + argname + ','
-            explanations.append('*' + star_args.explanation)
-        if self.dstar_args:
-            dstar_args = Interpretable(self.dstar_args)
-            dstar_args.eval(frame)
-            argname = '__exprinfo_kwds'
-            vars[argname] = dstar_args.result
-            source += '**' + argname + ','
-            explanations.append('**' + dstar_args.explanation)
-        self.explanation = "%s(%s)" % (
-            node.explanation, ', '.join(explanations))
-        if source.endswith(','):
-            source = source[:-1]
-        source += ')'
-        try:
-            self.result = frame.eval(source, **vars)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        if not node.is_builtin(frame) or not self.is_bool(frame):
-            r = frame.repr(self.result)
-            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
-
-class Getattr(Interpretable):
-    __view__ = ast.Getattr
-
-    def eval(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        source = '__exprinfo_expr.%s' % self.attrname
-        try:
-            self.result = frame.eval(source, __exprinfo_expr=expr.result)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-        self.explanation = '%s.%s' % (expr.explanation, self.attrname)
-        # if the attribute comes from the instance, its value is interesting
-        source = ('hasattr(__exprinfo_expr, "__dict__") and '
-                  '%r in __exprinfo_expr.__dict__' % self.attrname)
-        try:
-            from_instance = frame.is_true(
-                frame.eval(source, __exprinfo_expr=expr.result))
-        except passthroughex:
-            raise
-        except:
-            from_instance = True
-        if from_instance:
-            r = frame.repr(self.result)
-            self.explanation = '%s\n{%s = %s\n}' % (r, r, self.explanation)
-
-# == Re-interpretation of full statements ==
-
-class Assert(Interpretable):
-    __view__ = ast.Assert
-
-    def run(self, frame):
-        test = Interpretable(self.test)
-        test.eval(frame)
-        # simplify 'assert False where False = ...'
-        if (test.explanation.startswith('False\n{False = ') and
-            test.explanation.endswith('\n}')):
-            test.explanation = test.explanation[15:-2]
-        # print the result as  'assert <explanation>'
-        self.result = test.result
-        self.explanation = 'assert ' + test.explanation
-        if not frame.is_true(test.result):
-            try:
-                raise BuiltinAssertionError
-            except passthroughex:
-                raise
-            except:
-                raise Failure(self)
-
-class Assign(Interpretable):
-    __view__ = ast.Assign
-
-    def run(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        self.result = expr.result
-        self.explanation = '... = ' + expr.explanation
-        # fall-back-run the rest of the assignment
-        ass = ast.Assign(self.nodes, ast.Name('__exprinfo_expr'))
-        mod = ast.Module(None, ast.Stmt([ass]))
-        mod.filename = '<run>'
-        co = pycodegen.ModuleCodeGenerator(mod).getCode()
-        try:
-            frame.exec_(co, __exprinfo_expr=expr.result)
-        except passthroughex:
-            raise
-        except:
-            raise Failure(self)
-
-class Discard(Interpretable):
-    __view__ = ast.Discard
-
-    def run(self, frame):
-        expr = Interpretable(self.expr)
-        expr.eval(frame)
-        self.result = expr.result
-        self.explanation = expr.explanation
-
-class Stmt(Interpretable):
-    __view__ = ast.Stmt
-
-    def run(self, frame):
-        for stmt in self.nodes:
-            stmt = Interpretable(stmt)
-            stmt.run(frame)
-
-
-def report_failure(e):
-    explanation = e.node.nice_explanation()
-    if explanation:
-        explanation = ", in: " + explanation
-    else:
-        explanation = ""
-    sys.stdout.write("%s: %s%s\n" % (e.exc.__name__, e.value, explanation))
-
-def check(s, frame=None):
-    if frame is None:
-        frame = sys._getframe(1)
-        frame = py.code.Frame(frame)
-    expr = parse(s, 'eval')
-    assert isinstance(expr, ast.Expression)
-    node = Interpretable(expr.node)
-    try:
-        node.eval(frame)
-    except passthroughex:
-        raise
-    except Failure:
-        e = sys.exc_info()[1]
-        report_failure(e)
-    else:
-        if not frame.is_true(node.result):
-            sys.stderr.write("assertion failed: %s\n" % node.nice_explanation())
-
-
-###########################################################
-# API / Entry points
-# #########################################################
-
-def interpret(source, frame, should_fail=False):
-    module = Interpretable(parse(source, 'exec').node)
-    #print "got module", module
-    if isinstance(frame, py.std.types.FrameType):
-        frame = py.code.Frame(frame)
-    try:
-        module.run(frame)
-    except Failure:
-        e = sys.exc_info()[1]
-        return getfailure(e)
-    except passthroughex:
-        raise
-    except:
-        import traceback
-        traceback.print_exc()
-    if should_fail:
-        return ("(assertion failed, but when it was re-run for "
-                "printing intermediate values, it did not fail.  Suggestions: "
-                "compute assert expression before the assert or use --nomagic)")
-    else:
-        return None
-
-def getmsg(excinfo):
-    if isinstance(excinfo, tuple):
-        excinfo = py.code.ExceptionInfo(excinfo)
-    #frame, line = gettbline(tb)
-    #frame = py.code.Frame(frame)
-    #return interpret(line, frame)
-
-    tb = excinfo.traceback[-1]
-    source = str(tb.statement).strip()
-    x = interpret(source, tb.frame, should_fail=True)
-    if not isinstance(x, str):
-        raise TypeError("interpret returned non-string %r" % (x,))
-    return x
-
-def getfailure(e):
-    explanation = e.node.nice_explanation()
-    if str(e.value):
-        lines = explanation.split('\n')
-        lines[0] += "  << %s" % (e.value,)
-        explanation = '\n'.join(lines)
-    text = "%s: %s" % (e.exc.__name__, explanation)
-    if text.startswith('AssertionError: assert '):
-        text = text[16:]
-    return text
-
-def run(s, frame=None):
-    if frame is None:
-        frame = sys._getframe(1)
-        frame = py.code.Frame(frame)
-    module = Interpretable(parse(s, 'exec').node)
-    try:
-        module.run(frame)
-    except Failure:
-        e = sys.exc_info()[1]
-        report_failure(e)
-
-
-if __name__ == '__main__':
-    # example:
-    def f():
-        return 5
-    def g():
-        return 3
-    def h(x):
-        return 'never'
-    check("f() * g() == 5")
-    check("not f()")
-    check("not (f() and g() or 0)")
-    check("f() == g()")
-    i = 4
-    check("i == f()")
-    check("len(f()) == 0")
-    check("isinstance(2+3+4, float)")
-
-    run("x = i")
-    check("x == 5")
-
-    run("assert not f(), 'oops'")
-    run("a, b, c = 1, 2")
-    run("a, b, c = f()")
-
-    check("max([f(),g()]) == 4")
-    check("'hello'[g()] == 'h'")
-    run("'guk%d' % h(f())")
diff --git a/py/_code/assertion.py b/py/_code/assertion.py
deleted file mode 100644
--- a/py/_code/assertion.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import sys
-import py
-
-BuiltinAssertionError = py.builtin.builtins.AssertionError
-
-_reprcompare = None # if set, will be called by assert reinterp for comparison ops
-
-def _format_explanation(explanation):
-    """This formats an explanation
-
-    Normally all embedded newlines are escaped, however there are
-    three exceptions: \n{, \n} and \n~.  The first two are intended
-    cover nested explanations, see function and attribute explanations
-    for examples (.visit_Call(), visit_Attribute()).  The last one is
-    for when one explanation needs to span multiple lines, e.g. when
-    displaying diffs.
-    """
-    raw_lines = (explanation or '').split('\n')
-    # escape newlines not followed by {, } and ~
-    lines = [raw_lines[0]]
-    for l in raw_lines[1:]:
-        if l.startswith('{') or l.startswith('}') or l.startswith('~'):
-            lines.append(l)
-        else:
-            lines[-1] += '\\n' + l
-
-    result = lines[:1]
-    stack = [0]
-    stackcnt = [0]
-    for line in lines[1:]:
-        if line.startswith('{'):
-            if stackcnt[-1]:
-                s = 'and   '
-            else:
-                s = 'where '
-            stack.append(len(result))
-            stackcnt[-1] += 1
-            stackcnt.append(0)
-            result.append(' +' + '  '*(len(stack)-1) + s + line[1:])
-        elif line.startswith('}'):
-            assert line.startswith('}')
-            stack.pop()
-            stackcnt.pop()
-            result[stack[-1]] += line[1:]
-        else:
-            assert line.startswith('~')
-            result.append('  '*len(stack) + line[1:])
-    assert len(stack) == 1
-    return '\n'.join(result)
-
-
-class AssertionError(BuiltinAssertionError):
-    def __init__(self, *args):
-        BuiltinAssertionError.__init__(self, *args)
-        if args:
-            try:
-                self.msg = str(args[0])
-            except py.builtin._sysex:
-                raise
-            except:
-                self.msg = "<[broken __repr__] %s at %0xd>" %(
-                    args[0].__class__, id(args[0]))
-        else:
-            f = py.code.Frame(sys._getframe(1))
-            try:
-                source = f.code.fullsource
-                if source is not None:
-                    try:
-                        source = source.getstatement(f.lineno, assertion=True)
-                    except IndexError:
-                        source = None
-                    else:
-                        source = str(source.deindent()).strip()
-            except py.error.ENOENT:
-                source = None
-                # this can also occur during reinterpretation, when the
-                # co_filename is set to "<run>".
-            if source:
-                self.msg = reinterpret(source, f, should_fail=True)
-            else:
-                self.msg = "<could not determine information>"
-            if not self.args:
-                self.args = (self.msg,)
-
-if sys.version_info > (3, 0):
-    AssertionError.__module__ = "builtins"
-    reinterpret_old = "old reinterpretation not available for py3"
-else:
-    from py._code._assertionold import interpret as reinterpret_old
-if sys.version_info >= (2, 6) or (sys.platform.startswith("java")):
-    from py._code._assertionnew import interpret as reinterpret
-else:
-    reinterpret = reinterpret_old
-
diff --git a/py/_code/code.py b/py/_code/code.py
--- a/py/_code/code.py
+++ b/py/_code/code.py
@@ -145,17 +145,6 @@
         return self.frame.f_locals
     locals = property(getlocals, None, None, "locals of underlaying frame")
 
-    def reinterpret(self):
-        """Reinterpret the failing statement and returns a detailed information
-           about what operations are performed."""
-        if self.exprinfo is None:
-            source = str(self.statement).strip()
-            x = py.code._reinterpret(source, self.frame, should_fail=True)
-            if not isinstance(x, str):
-                raise TypeError("interpret returned non-string %r" % (x,))
-            self.exprinfo = x
-        return self.exprinfo
-
     def getfirstlinesource(self):
         # on Jython this firstlineno can be -1 apparently
         return max(self.frame.code.firstlineno, 0)
@@ -310,7 +299,7 @@
         #     ExceptionInfo-like classes may have different attributes.
         if tup is None:
             tup = sys.exc_info()
-            if exprinfo is None and isinstance(tup[1], py.code._AssertionError):
+            if exprinfo is None and isinstance(tup[1], AssertionError):
                 exprinfo = getattr(tup[1], 'msg', None)
                 if exprinfo is None:
                     exprinfo = str(tup[1])
@@ -690,22 +679,15 @@
 
 oldbuiltins = {}
 
-def patch_builtins(assertion=True, compile=True):
-    """ put compile and AssertionError builtins to Python's builtins. """
-    if assertion:
-        from py._code import assertion
-        l = oldbuiltins.setdefault('AssertionError', [])
-        l.append(py.builtin.builtins.AssertionError)
-        py.builtin.builtins.AssertionError = assertion.AssertionError
+def patch_builtins(compile=True):
+    """ put compile builtins to Python's builtins. """
     if compile:
         l = oldbuiltins.setdefault('compile', [])
         l.append(py.builtin.builtins.compile)
         py.builtin.builtins.compile = py.code.compile
 
-def unpatch_builtins(assertion=True, compile=True):
+def unpatch_builtins(compile=True):
     """ remove compile and AssertionError builtins from Python builtins. """
-    if assertion:
-        py.builtin.builtins.AssertionError = oldbuiltins['AssertionError'].pop()
     if compile:
         py.builtin.builtins.compile = oldbuiltins['compile'].pop()
 
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -279,13 +279,13 @@
         desc = self.getdesc(cls)
         return desc.getuniqueclassdef()
 
-    def getlistdef(self, **flags):
+    def getlistdef(self, **flags_if_new):
         """Get the ListDef associated with the current position."""
         try:
             listdef = self.listdefs[self.position_key]
         except KeyError:
             listdef = self.listdefs[self.position_key] = ListDef(self)
-            listdef.listitem.__dict__.update(flags)
+            listdef.listitem.__dict__.update(flags_if_new)
         return listdef
 
     def newlist(self, *s_values, **flags):
@@ -294,14 +294,18 @@
         listdef = self.getlistdef(**flags)
         for s_value in s_values:
             listdef.generalize(s_value)
+        if flags:
+            assert flags.keys() == ['range_step']
+            listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
-    def getdictdef(self, is_r_dict=False):
+    def getdictdef(self, is_r_dict=False, force_non_null=False):
         """Get the DictDef associated with the current position."""
         try:
             dictdef = self.dictdefs[self.position_key]
         except KeyError:
-            dictdef = DictDef(self, is_r_dict=is_r_dict)
+            dictdef = DictDef(self, is_r_dict=is_r_dict,
+                              force_non_null=force_non_null)
             self.dictdefs[self.position_key] = dictdef
         return dictdef
 
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -311,8 +311,14 @@
 def robjmodel_we_are_translated():
     return immutablevalue(True)
 
-def robjmodel_r_dict(s_eqfn, s_hashfn):
-    dictdef = getbookkeeper().getdictdef(is_r_dict=True)
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+    if s_force_non_null is None:
+        force_non_null = False
+    else:
+        assert s_force_non_null.is_constant()
+        force_non_null = s_force_non_null.const
+    dictdef = getbookkeeper().getdictdef(is_r_dict=True,
+                                         force_non_null=force_non_null)
     dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
     return SomeDict(dictdef)
 
@@ -351,17 +357,6 @@
 def llmemory_cast_int_to_adr(s):
     return SomeAddress()
 
-
-##def rarith_ovfcheck(s_obj):
-##    if isinstance(s_obj, SomeInteger) and s_obj.unsigned:
-##        getbookkeeper().warning("ovfcheck on unsigned")
-##    return s_obj
-
-##def rarith_ovfcheck_lshift(s_obj1, s_obj2):
-##    if isinstance(s_obj1, SomeInteger) and s_obj1.unsigned:
-##        getbookkeeper().warning("ovfcheck_lshift with unsigned")
-##    return SomeInteger()
-
 def unicodedata_decimal(s_uchr):
     raise TypeError, "unicodedate.decimal() calls should not happen at interp-level"    
 
@@ -379,8 +374,6 @@
         original = getattr(__builtin__, name[8:])
         BUILTIN_ANALYZERS[original] = value
 
-##BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.ovfcheck] = rarith_ovfcheck
-##BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.ovfcheck_lshift] = rarith_ovfcheck_lshift
 BUILTIN_ANALYZERS[pypy.rlib.rarithmetic.intmask] = rarith_intmask
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.instantiate] = robjmodel_instantiate
 BUILTIN_ANALYZERS[pypy.rlib.objectmodel.we_are_translated] = (
diff --git a/pypy/annotation/dictdef.py b/pypy/annotation/dictdef.py
--- a/pypy/annotation/dictdef.py
+++ b/pypy/annotation/dictdef.py
@@ -85,12 +85,14 @@
 
     def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
                                  s_value = s_ImpossibleValue,
-                               is_r_dict = False):
+                               is_r_dict = False,
+                           force_non_null = False):
         self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
         self.dictkey.itemof[self] = True
         self.dictvalue = DictValue(bookkeeper, s_value)
         self.dictvalue.itemof[self] = True
         self.bookkeeper = bookkeeper
+        self.force_non_null = force_non_null
 
     def read_key(self, position_key=None):
         if position_key is None:
diff --git a/pypy/annotation/listdef.py b/pypy/annotation/listdef.py
--- a/pypy/annotation/listdef.py
+++ b/pypy/annotation/listdef.py
@@ -184,6 +184,11 @@
     def generalize(self, s_value):
         self.listitem.generalize(s_value)
 
+    def generalize_range_step(self, range_step):
+        newlistitem = ListItem(self.listitem.bookkeeper, s_ImpossibleValue)
+        newlistitem.range_step = range_step
+        self.listitem.merge(newlistitem)
+
     def __repr__(self):
         return '<[%r]%s%s%s%s>' % (self.listitem.s_value,
                                self.listitem.mutated and 'm' or '',
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -3483,6 +3483,17 @@
         a = self.RPythonAnnotator()
         raises(Exception, a.build_types, f, [int])
 
+    def test_range_variable_step(self):
+        def g(n):
+            return range(0, 10, n)
+        def f(n):
+            r = g(1)    # constant step, at first
+            s = g(n)    # but it becomes a variable step
+            return r
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [int])
+        assert s.listdef.listitem.range_step == 0
+
 
 def g(n):
     return [0,1,2,n]
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -33,13 +33,17 @@
      "struct", "_hashlib", "_md5", "_sha", "_minimal_curses", "cStringIO",
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
-     "_collections", "_multibytecodec", "micronumpy"]
+     "_collections", "_multibytecodec", "micronumpy", "_ffi"]
 ))
 
 translation_modules = default_modules.copy()
 translation_modules.update(dict.fromkeys(
     ["fcntl", "rctime", "select", "signal", "_rawffi", "zlib",
-     "struct", "_md5", "cStringIO", "array"]))
+     "struct", "_md5", "cStringIO", "array", "_ffi",
+     # the following are needed for pyrepl (and hence for the
+     # interactive prompt/pdb)
+     "termios", "_minimal_curses",
+     ]))
 
 working_oo_modules = default_modules.copy()
 working_oo_modules.update(dict.fromkeys(
@@ -80,6 +84,7 @@
     "_rawffi": [("objspace.usemodules.struct", True)],
     "cpyext": [("translation.secondaryentrypoints", "cpyext"),
                ("translation.shared", sys.platform == "win32")],
+    "_ffi":    [("translation.jit_ffi", True)],
 }
 
 module_import_dependencies = {
@@ -124,9 +129,6 @@
                  cmdline='--objspace -o'),
 
     OptionDescription("opcodes", "opcodes to enable in the interpreter", [
-        BoolOption("CALL_LIKELY_BUILTIN", "emit a special bytecode for likely calls to builtin functions",
-                   default=False,
-                   requires=[("translation.stackless", False)]),
         BoolOption("CALL_METHOD", "emit a special bytecode for expr.name()",
                    default=False),
         ]),
@@ -261,13 +263,7 @@
         BoolOption("withcelldict",
                    "use dictionaries that are optimized for being used as module dicts",
                    default=False,
-                   requires=[("objspace.opcodes.CALL_LIKELY_BUILTIN", False),
-                             ("objspace.honor__builtins__", False)]),
-
-        BoolOption("withdictmeasurement",
-                   "create huge files with masses of information "
-                   "about dictionaries",
-                   default=False),
+                   requires=[("objspace.honor__builtins__", False)]),
 
         BoolOption("withmapdict",
                    "make instances really small but slow without the JIT",
@@ -350,8 +346,6 @@
     backend = config.translation.backend
 
     # all the good optimizations for PyPy should be listed here
-    if level in ['2', '3']:
-        config.objspace.opcodes.suggest(CALL_LIKELY_BUILTIN=True)
     if level in ['2', '3', 'jit']:
         config.objspace.opcodes.suggest(CALL_METHOD=True)
         config.objspace.std.suggest(withrangelist=True)
diff --git a/pypy/config/test/test_pypyoption.py b/pypy/config/test/test_pypyoption.py
--- a/pypy/config/test/test_pypyoption.py
+++ b/pypy/config/test/test_pypyoption.py
@@ -73,3 +73,7 @@
             fn = prefix + "." + path + ".txt"
             yield check_file_exists, fn
 
+def test__ffi_opt():
+    config = get_pypy_config(translating=True)
+    config.objspace.usemodules._ffi = True
+    assert config.translation.jit_ffi
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -118,6 +118,8 @@
     ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
                  ["off", "oprofile"],
                  default="off"),
+    # jit_ffi is automatically turned on by withmod-_ffi (which is enabled by default)
+    BoolOption("jit_ffi", "optimize libffi calls", default=False, cmdline=None),
 
     # misc
     BoolOption("verbose", "Print extra information", default=False),
diff --git a/pypy/doc/config/objspace.opcodes.CALL_LIKELY_BUILTIN.txt b/pypy/doc/config/objspace.opcodes.CALL_LIKELY_BUILTIN.txt
deleted file mode 100644
--- a/pypy/doc/config/objspace.opcodes.CALL_LIKELY_BUILTIN.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Introduce a new opcode called ``CALL_LIKELY_BUILTIN``. It is used when something
-is called, that looks like a builtin function (but could in reality be shadowed
-by a name in the module globals). For all module globals dictionaries it is
-then tracked which builtin name is shadowed in this module. If the
-``CALL_LIKELY_BUILTIN`` opcode is executed, it is checked whether the builtin is
-shadowed. If not, the corresponding builtin is called. Otherwise the object that
-is shadowing it is called instead. If no shadowing is happening, this saves two
-dictionary lookups on calls to builtins.
-
-For more information, see the section in `Standard Interpreter Optimizations`_.
-
-.. _`Standard Interpreter Optimizations`: ../interpreter-optimizations.html#call-likely-builtin
diff --git a/pypy/doc/config/objspace.std.withdictmeasurement.txt b/pypy/doc/config/objspace.std.withdictmeasurement.txt
deleted file mode 100644
--- a/pypy/doc/config/objspace.std.withdictmeasurement.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Internal option.
-
-.. internal
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -173,6 +173,11 @@
     >>>> A.__del__ = lambda self: None
     __main__:1: RuntimeWarning: a __del__ method added to an existing type will not be called
 
+Even more obscure: the same is true, for old-style classes, if you attach
+the ``__del__`` to an instance (even in CPython this does not work with
+new-style classes).  You get a RuntimeWarning in PyPy.  To fix these cases
+just make sure there is a ``__del__`` method in the class to start with.
+
 
 Subclasses of built-in types
 ----------------------------
@@ -243,5 +248,7 @@
   never a dictionary as it sometimes is in CPython. Assigning to
   ``__builtins__`` has no effect.
 
+* object identity of immutable keys in dictionaries is not necessarily preserved.
+  Never compare immutable objects with ``is``.
+
 .. include:: _ref.txt
-
diff --git a/pypy/doc/garbage_collection.rst b/pypy/doc/garbage_collection.rst
--- a/pypy/doc/garbage_collection.rst
+++ b/pypy/doc/garbage_collection.rst
@@ -212,90 +212,4 @@
   becomes free garbage, to be collected at the next major collection.
 
 
-Minimark GC
------------
-
-This is a simplification and rewrite of the ideas from the Hybrid GC.
-It uses a nursery for the young objects, and mark-and-sweep for the old
-objects.  This is a moving GC, but objects may only move once (from
-the nursery to the old stage).
-
-The main difference with the Hybrid GC is that the mark-and-sweep
-objects (the "old stage") are directly handled by the GC's custom
-allocator, instead of being handled by malloc() calls.  The gain is that
-it is then possible, during a major collection, to walk through all old
-generation objects without needing to store a list of pointers to them.
-So as a first approximation, when compared to the Hybrid GC, the
-Minimark GC saves one word of memory per old object.
-
-There are a number of environment variables that can be tweaked to
-influence the GC.  (Their default value should be ok for most usages.)
-You can read more about them at the start of
-`pypy/rpython/memory/gc/minimark.py`_.
-
-In more details:
-
-- The small newly malloced objects are allocated in the nursery (case 1).
-  All objects living in the nursery are "young".
-
-- The big objects are always handled directly by the system malloc().
-  But the big newly malloced objects are still "young" when they are
-  allocated (case 2), even though they don't live in the nursery.
-
-- When the nursery is full, we do a minor collection, i.e. we find
-  which "young" objects are still alive (from cases 1 and 2).  The
-  "young" flag is then removed.  The surviving case 1 objects are moved
-  to the old stage. The dying case 2 objects are immediately freed.
-
-- The old stage is an area of memory containing old (small) objects.  It
-  is handled by `pypy/rpython/memory/gc/minimarkpage.py`_.  It is organized
-  as "arenas" of 256KB or 512KB, subdivided into "pages" of 4KB or 8KB.
-  Each page can either be free, or contain small objects of all the same
-  size.  Furthermore at any point in time each object location can be
-  either allocated or freed.  The basic design comes from ``obmalloc.c``
-  from CPython (which itself comes from the same source as the Linux
-  system malloc()).
-
-- New objects are added to the old stage at every minor collection.
-  Immediately after a minor collection, when we reach some threshold, we
-  trigger a major collection.  This is the mark-and-sweep step.  It walks
-  over *all* objects (mark), and then frees some fraction of them (sweep).
-  This means that the only time when we want to free objects is while
-  walking over all of them; we never ask to free an object given just its
-  address.  This allows some simplifications and memory savings when
-  compared to ``obmalloc.c``.
-
-- As with all generational collectors, this GC needs a write barrier to
-  record which old objects have a reference to young objects.
-
-- Additionally, we found out that it is useful to handle the case of
-  big arrays specially: when we allocate a big array (with the system
-  malloc()), we reserve a small number of bytes before.  When the array
-  grows old, we use the extra bytes as a set of bits.  Each bit
-  represents 128 entries in the array.  Whenever the write barrier is
-  called to record a reference from the Nth entry of the array to some
-  young object, we set the bit number ``(N/128)`` to 1.  This can
-  considerably speed up minor collections, because we then only have to
-  scan 128 entries of the array instead of all of them.
-
-- As usual, we need special care about weak references, and objects with
-  finalizers.  Weak references are allocated in the nursery, and if they
-  survive they move to the old stage, as usual for all objects; the
-  difference is that the reference they contain must either follow the
-  object, or be set to NULL if the object dies.  And the objects with
-  finalizers, considered rare enough, are immediately allocated old to
-  simplify the design.  In particular their ``__del__`` method can only
-  be called just after a major collection.
-
-- The objects move once only, so we can use a trick to implement id()
-  and hash().  If the object is not in the nursery, it won't move any
-  more, so its id() and hash() are the object's address, cast to an
-  integer.  If the object is in the nursery, and we ask for its id()
-  or its hash(), then we pre-reserve a location in the old stage, and
-  return the address of that location.  If the object survives the
-  next minor collection, we move it there, and so its id() and hash()
-  are preserved.  If the object dies then the pre-reserved location
-  becomes free garbage, to be collected at the next major collection.
-
-
 .. include:: _ref.txt
diff --git a/pypy/doc/getting-started.rst b/pypy/doc/getting-started.rst
--- a/pypy/doc/getting-started.rst
+++ b/pypy/doc/getting-started.rst
@@ -51,7 +51,7 @@
 ---------------
 
 PyPy is ready to be executed as soon as you unpack the tarball or the zip
-file, with no need install it in any specific location::
+file, with no need to install it in any specific location::
 
     $ tar xf pypy-1.5-linux.tar.bz2
 
diff --git a/pypy/doc/image/jitviewer.png b/pypy/doc/image/jitviewer.png
new file mode 100644
index 0000000000000000000000000000000000000000..ad2abca5c88125061fa519dcf3f9fada577573ee
GIT binary patch

[cut]

diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -11,6 +11,10 @@
 Getting into PyPy ... 
 =============================================
 
+* `Getting started`_: how to install and run the PyPy Python interpreter
+
+* `FAQ`_: some frequently asked questions.
+
 * `Release 1.5`_: the latest official release
 
 * `PyPy Blog`_: news and status info about PyPy 
@@ -21,16 +25,11 @@
 
 * `speed.pypy.org`_: Daily benchmarks of how fast PyPy is
 
+* `potential project ideas`_: In case you want to get your feet wet...
+
 Documentation for the PyPy Python Interpreter
 ===============================================
 
-`getting started`_ provides hands-on instructions 
-including a two-liner to run the PyPy Python interpreter 
-on your system, examples on advanced features and 
-entry points for using the `RPython toolchain`_.
-
-`FAQ`_ contains some frequently asked questions.
-
 New features of PyPy's Python Interpreter and 
 Translation Framework: 
 
@@ -59,8 +58,6 @@
   (if they are not already developed in the FAQ_).
   You can find logs of the channel here_.
 
-.. XXX play1? 
-
 Meeting PyPy developers
 =======================
 
@@ -83,7 +80,7 @@
 .. _`Release 1.5`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
-
+.. _`potential project ideas`: project-ideas.html
 
 Project Documentation
 =====================================
diff --git a/pypy/doc/interpreter-optimizations.rst b/pypy/doc/interpreter-optimizations.rst
--- a/pypy/doc/interpreter-optimizations.rst
+++ b/pypy/doc/interpreter-optimizations.rst
@@ -157,32 +157,6 @@
 A more advanced version of sharing dicts, called *map dicts,* is available
 with the :config:`objspace.std.withmapdict` option.
 
-Builtin-Shadowing
-+++++++++++++++++
-
-Usually the calling of builtins in Python requires two dictionary lookups: first
-to see whether the current global dictionary contains an object with the same
-name, then a lookup in the ``__builtin__`` dictionary. This is somehow
-circumvented by storing an often used builtin into a local variable to get
-the fast local lookup (which is a rather strange and ugly hack).
-
-The same problem is solved in a different way by "wary" dictionaries. They are
-another dictionary representation used together with multidicts. This
-representation is used only for module dictionaries. The representation checks on
-every setitem whether the key that is used is the name of a builtin. If this is
-the case, the dictionary is marked as shadowing that particular builtin.
-
-To identify calls to builtins easily, a new bytecode (``CALL_LIKELY_BUILTIN``)
-is introduced. Whenever it is executed, the globals dictionary is checked
-to see whether it masks the builtin (which is possible without a dictionary
-lookup).  Then the ``__builtin__`` dict is checked in the same way,
-to see whether somebody replaced the real builtin with something else. In the
-common case, the program didn't do any of these; the proper builtin can then
-be called without using any dictionary lookup at all.
-
-You can enable this feature with the
-:config:`objspace.opcodes.CALL_LIKELY_BUILTIN` option.
-
 
 List Optimizations
 ------------------
@@ -289,34 +263,6 @@
 You can enable this feature with the :config:`objspace.opcodes.CALL_METHOD`
 option.
 
-.. _`call likely builtin`:
-
-CALL_LIKELY_BUILTIN
-+++++++++++++++++++
-
-A often heard "tip" for speeding up Python programs is to give an often used
-builtin a local name, since local lookups are faster than lookups of builtins,
-which involve doing two dictionary lookups: one in the globals dictionary and
-one in the the builtins dictionary. PyPy approaches this problem at the
-implementation level, with the introduction of the new ``CALL_LIKELY_BUILTIN``
-bytecode. This bytecode is produced by the compiler for a call whose target is
-the name of a builtin.  Since such a syntactic construct is very often actually
-invoking the expected builtin at run-time, this information can be used to make
-the call to the builtin directly, without going through any dictionary lookup.
-
-However, it can occur that the name is shadowed by a global name from the
-current module.  To catch this case, a special dictionary implementation for
-multidicts is introduced, which is used for the dictionaries of modules. This
-implementation keeps track which builtin name is shadowed by it.  The
-``CALL_LIKELY_BUILTIN`` bytecode asks the dictionary whether it is shadowing the
-builtin that is about to be called and asks the dictionary of ``__builtin__``
-whether the original builtin was changed.  These two checks are cheaper than
-full lookups.  In the common case, neither of these cases is true, so the
-builtin can be directly invoked.
-
-You can enable this feature with the
-:config:`objspace.opcodes.CALL_LIKELY_BUILTIN` option.
-
 .. more here?
 
 Overall Effects
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -11,6 +11,12 @@
 `mailing list`_. This is simply for the reason that small possible projects
 tend to change very rapidly.
 
+This list is mostly for having on overview on potential projects. This list is
+by definition not exhaustive and we're pleased if people come up with their
+own improvement ideas. In any case, if you feel like working on some of those
+projects, or anything else in PyPy, pop up on IRC or write to us on the
+`mailing list`_.
+
 Numpy improvements
 ------------------
 
@@ -23,27 +29,121 @@
 
 * interface with fortran/C libraries.
 
-Potential mentors: fijal
+Improving the jitviewer
+------------------------
 
-JIT tooling
------------
+Analyzing performance of applications is always tricky. We have various
+tools, for example a `jitviewer`_ that help us analyze performance.
 
-xxx
+The jitviewer shows the code generated by the PyPy JIT in a hierarchical way,
+as shown by the screenshot below:
+
+  - at the bottom level, it shows the Python source code of the compiled loops
+
+  - for each source code line, it shows the corresponding Python bytecode
+
+  - for each opcode, it shows the corresponding jit operations, which are the
+    ones actually sent to the backend for compiling (such as ``i15 = i10 <
+    2000`` in the example)
+
+.. image:: image/jitviewer.png
+
+We would like to add one level to this hierarchy, by showing the generated
+machine code for each jit operation.  The necessary information is already in
+the log file produced by the JIT, so it is "only" a matter of teaching the
+jitviewer to display it.  Ideally, the machine code should be hidden by
+default and viewable on request.
+
+The jitviewer is a web application based on flask and jinja2 (and jQuery on
+the client): if you have great web developing skills and want to help PyPy,
+this is an ideal task to get started, because it does not require any deep
+knowledge of the internals.
+
+Translation Toolchain
+---------------------
+
+* Incremental or distributed translation.
+
+* Allow separate compilation of extension modules.
 
 Work on some of other languages
 -------------------------------
 
-xxx
+There are various languages implemented using the RPython translation toolchain.
+One of the most interesting is the `JavaScript implementation`_, but there
+are others like scheme or prolog. An interesting project would be to improve
+the jittability of those or to experiment with various optimizations.
 
 Various GCs
 -----------
 
-xxx
+PyPy has pluggable garbage collection policy. This means that various garbage
+collectors can be written for specialized purposes, or even various
+experiments can be done for the general purpose. Examples
+
+* An incremental garbage collector that has specified maximal pause times,
+  crucial for games
+
+* A garbage collector that compact memory better for mobile devices
+
+* A concurrent garbage collector (a lot of work)
 
 Remove the GIL
 --------------
 
-xxx
+This is a major task that requires lots of thinking. However, few subprojects
+can be potentially specified, unless a better plan can be thought out:
 
-.. _`issue tracker`: ...
-.. _`mailing list`: ...
+* A thread-aware garbage collector
+
+* Better RPython primitives for dealing with concurrency
+
+* JIT passes to remove locks on objects
+
+* (maybe) implement locking in Python interpreter
+
+* alternatively, look at Software Transactional Memory
+
+Introduce new benchmarks
+------------------------
+
+We're usually happy to introduce new benchmarks. Please consult us
+before, but in general something that's real-world python code
+and is not already represented is welcome. We need at least a standalone
+script that can run without parameters. Example ideas (benchmarks need
+to be got from them!):
+
+* `hg`
+
+* `sympy`
+
+Experiment (again) with LLVM backend for RPython compilation
+------------------------------------------------------------
+
+We already tried working with LLVM and at the time, LLVM was not mature enough
+for our needs. It's possible that this has changed, reviving the LLVM backend
+(or writing new from scratch) for static compilation would be a good project.
+
+(On the other hand, just generating C code and using clang might be enough.
+The issue with that is the so-called "asmgcc GC root finder", which has tons
+of issues of this own.  In my opinion (arigo), it would be definitely a
+better project to try to optimize the alternative, the "shadowstack" GC root
+finder, which is nicely portable.  So far it gives a pypy that is around
+7% slower.)
+
+Embedding PyPy
+----------------------------------------
+
+Being able to embed PyPy, say with its own limited C API, would be
+useful.  But here is the most interesting variant, straight from
+EuroPython live discussion :-)  We can have a generic "libpypy.so" that
+can be used as a placeholder dynamic library, and when it gets loaded,
+it runs a .py module that installs (via ctypes) the interface it wants
+exported.  This would give us a one-size-fits-all generic .so file to be
+imported by any application that wants to load .so files :-)
+
+
+.. _`issue tracker`: http://bugs.pypy.org
+.. _`mailing list`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`jitviewer`: http://bitbucket.org/pypy/jitviewer
+.. _`JavaScript implementation`: https://bitbucket.org/pypy/lang-js/overview
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -17,7 +17,7 @@
         self.varargname = varargname
         self.kwargname = kwargname
 
-    @jit.purefunction
+    @jit.elidable
     def find_argname(self, name):
         try:
             return self.argnames.index(name)
@@ -90,15 +90,18 @@
     ###  Construction  ###
 
     def __init__(self, space, args_w, keywords=None, keywords_w=None,
-                 w_stararg=None, w_starstararg=None):
+                 w_stararg=None, w_starstararg=None, keyword_names_w=None):
         self.space = space
         assert isinstance(args_w, list)
         self.arguments_w = args_w
         self.keywords = keywords
         self.keywords_w = keywords_w
+        self.keyword_names_w = keyword_names_w  # matches the tail of .keywords
         if keywords is not None:
             assert keywords_w is not None
             assert len(keywords_w) == len(keywords)
+            assert (keyword_names_w is None or
+                    len(keyword_names_w) <= len(keywords))
             make_sure_not_resized(self.keywords)
             make_sure_not_resized(self.keywords_w)
 
@@ -132,7 +135,8 @@
 
     def replace_arguments(self, args_w):
         "Return a new Arguments with a args_w as positional arguments."
-        return Arguments(self.space, args_w, self.keywords, self.keywords_w)
+        return Arguments(self.space, args_w, self.keywords, self.keywords_w,
+                         keyword_names_w = self.keyword_names_w)
 
     def prepend(self, w_firstarg):
         "Return a new Arguments with a new argument inserted first."
@@ -201,15 +205,16 @@
                         space.w_TypeError,
                         space.wrap("keywords must be strings"))
                 if e.match(space, space.w_UnicodeEncodeError):
-                    raise OperationError(
-                        space.w_TypeError,
-                        space.wrap("keyword cannot be encoded to ascii"))
-                raise
-            if self.keywords and key in self.keywords:
-                raise operationerrfmt(self.space.w_TypeError,
-                                      "got multiple values "
-                                      "for keyword argument "
-                                      "'%s'", key)
+                    # Allow this to pass through
+                    key = None
+                else:
+                    raise
+            else:
+                if self.keywords and key in self.keywords:
+                    raise operationerrfmt(self.space.w_TypeError,
+                                          "got multiple values "
+                                          "for keyword argument "
+                                          "'%s'", key)
             keywords[i] = key
             keywords_w[i] = space.getitem(w_starstararg, w_key)
             i += 1
@@ -219,6 +224,7 @@
         else:
             self.keywords = self.keywords + keywords
             self.keywords_w = self.keywords_w + keywords_w
+        self.keyword_names_w = keys_w
 
     def fixedunpack(self, argcount):
         """The simplest argument parsing: get the 'argcount' arguments,
@@ -339,6 +345,10 @@
             used_keywords = [False] * num_kwds
             for i in range(num_kwds):
                 name = keywords[i]
+                # If name was not encoded as a string, it could be None. In that
+                # case, it's definitely not going to be in the signature.
+                if name is None:
+                    continue
                 j = signature.find_argname(name)
                 if j < 0:
                     continue
@@ -374,17 +384,26 @@
         if has_kwarg:
             w_kwds = self.space.newdict()
             if num_remainingkwds:
+                #
+                limit = len(keywords)
+                if self.keyword_names_w is not None:
+                    limit -= len(self.keyword_names_w)
                 for i in range(len(keywords)):
                     if not used_keywords[i]:
-                        key = keywords[i]
-                        self.space.setitem(w_kwds, self.space.wrap(key), keywords_w[i])
+                        if i < limit:
+                            w_key = self.space.wrap(keywords[i])
+                        else:
+                            w_key = self.keyword_names_w[i - limit]
+                        self.space.setitem(w_kwds, w_key, keywords_w[i])
+                #
             scope_w[co_argcount + has_vararg] = w_kwds
         elif num_remainingkwds:
             if co_argcount == 0:
                 raise ArgErrCount(avail, num_kwds,
                               co_argcount, has_vararg, has_kwarg,
                               defaults_w, missing)
-            raise ArgErrUnknownKwds(num_remainingkwds, keywords, used_keywords)
+            raise ArgErrUnknownKwds(self.space, num_remainingkwds, keywords,
+                                    used_keywords, self.keyword_names_w)
 
         if missing:
             raise ArgErrCount(avail, num_kwds,
@@ -443,9 +462,15 @@
         w_args = space.newtuple(self.arguments_w)
         w_kwds = space.newdict()
         if self.keywords is not None:
+            limit = len(self.keywords)
+            if self.keyword_names_w is not None:
+                limit -= len(self.keyword_names_w)
             for i in range(len(self.keywords)):
-                space.setitem(w_kwds, space.wrap(self.keywords[i]),
-                                      self.keywords_w[i])
+                if i < limit:
+                    w_key = space.wrap(self.keywords[i])
+                else:
+                    w_key = self.keyword_names_w[i - limit]
+                space.setitem(w_kwds, w_key, self.keywords_w[i])
         return w_args, w_kwds
 
 class ArgumentsForTranslation(Arguments):
@@ -666,14 +691,33 @@
 
 class ArgErrUnknownKwds(ArgErr):
 
-    def __init__(self, num_remainingkwds, keywords, used_keywords):
-        self.kwd_name = ''
+    def __init__(self, space, num_remainingkwds, keywords, used_keywords,
+                 keyword_names_w):
+        name = ''
         self.num_kwds = num_remainingkwds
         if num_remainingkwds == 1:
             for i in range(len(keywords)):
                 if not used_keywords[i]:
-                    self.kwd_name = keywords[i]
+                    name = keywords[i]
+                    if name is None:
+                        # We'll assume it's unicode. Encode it.
+                        # Careful, I *think* it should not be possible to
+                        # get an IndexError here but you never know.
+                        try:
+                            if keyword_names_w is None:
+                                raise IndexError
+                            # note: negative-based indexing from the end
+                            w_name = keyword_names_w[i - len(keywords)]
+                        except IndexError:
+                            name = '?'
+                        else:
+                            w_enc = space.wrap(space.sys.defaultencoding)
+                            w_err = space.wrap("replace")
+                            w_name = space.call_method(w_name, "encode", w_enc,
+                                                       w_err)
+                            name = space.str_w(w_name)
                     break
+        self.kwd_name = name
 
     def getmsg(self, fnname):
         if self.num_kwds == 1:
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -655,9 +655,6 @@
 def _compute_CALL_FUNCTION_VAR_KW(arg):
     return -_num_args(arg) - 2
 
-def _compute_CALL_LIKELY_BUILTIN(arg):
-    return -(arg & 0xFF) + 1
-
 def _compute_CALL_METHOD(arg):
     return -_num_args(arg) - 1
 
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -12,7 +12,6 @@
 from pypy.interpreter.pyparser.error import SyntaxError
 from pypy.tool import stdlib_opcode as ops
 from pypy.interpreter.error import OperationError
-from pypy.module.__builtin__.__init__ import BUILTIN_TO_INDEX
 
 
 def compile_ast(space, module, info):
@@ -134,7 +133,7 @@
 
     def accept_comp_iteration(self, codegen, index):
         self.elt.walkabout(codegen)
-        codegen.emit_op_arg(ops.SET_ADD, index)
+        codegen.emit_op_arg(ops.SET_ADD, index + 1)
 
 
 class __extend__(ast.DictComp):
@@ -148,7 +147,7 @@
     def accept_comp_iteration(self, codegen, index):
         self.value.walkabout(codegen)
         self.key.walkabout(codegen)
-        codegen.emit_op_arg(ops.MAP_ADD, index)
+        codegen.emit_op_arg(ops.MAP_ADD, index + 1)
 
 
 # These are frame blocks.
@@ -942,8 +941,7 @@
 
     def visit_Call(self, call):
         self.update_position(call.lineno)
-        if self._optimize_builtin_call(call) or \
-                self._optimize_method_call(call):
+        if self._optimize_method_call(call):
             return
         call.func.walkabout(self)
         arg = 0
@@ -977,28 +975,6 @@
     def _call_has_simple_args(self, call):
         return self._call_has_no_star_args(call) and not call.keywords
 
-    def _optimize_builtin_call(self, call):
-        if not self.space.config.objspace.opcodes.CALL_LIKELY_BUILTIN or \
-                not self._call_has_simple_args(call) or \
-                not isinstance(call.func, ast.Name):
-            return False
-        func_name = call.func
-        assert isinstance(func_name, ast.Name)
-        name_scope = self.scope.lookup(func_name.id)
-        if name_scope == symtable.SCOPE_GLOBAL_IMPLICIT or \
-                name_scope == symtable.SCOPE_UNKNOWN:
-            builtin_index = BUILTIN_TO_INDEX.get(func_name.id, -1)
-            if builtin_index != -1:
-                if call.args:
-                    args_count = len(call.args)
-                    self.visit_sequence(call.args)
-                else:
-                    args_count = 0
-                arg = builtin_index << 8 | args_count
-                self.emit_op_arg(ops.CALL_LIKELY_BUILTIN, arg)
-                return True
-        return False
-
     def _optimize_method_call(self, call):
         if not self.space.config.objspace.opcodes.CALL_METHOD or \
                 not self._call_has_no_star_args(call) or \
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -92,7 +92,10 @@
         return name
     if len(name) + 2 >= MANGLE_LEN:
         return name
-    if name.endswith('__'):
+    # Don't mangle __id__ or names with dots. The only time a name with a dot
+    # can occur is when we are compiling an import statement that has a package
+    # name.
+    if name.endswith('__') or '.' in name:
         return name
     try:
         i = 0
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -55,7 +55,7 @@
         co_expr = compile(evalexpr, '<evalexpr>', 'eval')
         space = self.space
         pyco_expr = PyCode._from_code(space, co_expr)
-        w_res = pyco_expr.exec_host_bytecode(space, w_dict, w_dict)
+        w_res = pyco_expr.exec_host_bytecode(w_dict, w_dict)
         res = space.str_w(space.repr(w_res))
         if not isinstance(expected, float):
             assert res == repr(expected)
@@ -308,6 +308,15 @@
                "p.__name__", os.path.__name__)
         yield (self.st, 'from os import *',
                "path.__name__, sep", (os.path.__name__, os.sep))
+        yield (self.st, '''
+            class A(object):
+                def m(self):
+                    from __foo__.bar import x
+            try:
+                A().m()
+            except ImportError, e:
+                msg = str(e)
+            ''', "msg", "No module named __foo__")
 
     def test_if_stmts(self):
         yield self.st, "a = 42\nif a > 10: a += 2", "a", 44
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -237,7 +237,7 @@
 
 class ObjSpace(object):
     """Base class for the interpreter-level implementations of object spaces.
-    http://codespeak.net/pypy/dist/pypy/doc/objspace.html"""
+    http://pypy.readthedocs.org/en/latest/objspace.html"""
 
     full_exceptions = True  # full support for exceptions (normalization & more)
 
@@ -311,9 +311,6 @@
             mod = self.interpclass_w(w_mod)
             if isinstance(mod, Module) and mod.startup_called:
                 mod.shutdown(self)
-        if self.config.objspace.std.withdictmeasurement:
-            from pypy.objspace.std.dictmultiobject import report
-            report()
         if self.config.objspace.logbytecodes:
             self.reportbytecodecounts()
         if self.config.objspace.std.logspaceoptypes:
@@ -989,10 +986,7 @@
             compiler = self.createcompiler()
             expression = compiler.compile(expression, '?', 'eval', 0,
                                          hidden_applevel=hidden_applevel)
-        if isinstance(expression, types.CodeType):
-            # XXX only used by appsupport
-            expression = PyCode._from_code(self, expression)
-        if not isinstance(expression, PyCode):
+        else:
             raise TypeError, 'space.eval(): expected a string, code or PyCode object'
         return expression.exec_code(self, w_globals, w_locals)
 
@@ -1007,9 +1001,6 @@
             compiler = self.createcompiler()
             statement = compiler.compile(statement, filename, 'exec', 0,
                                          hidden_applevel=hidden_applevel)
-        if isinstance(statement, types.CodeType):
-            # XXX only used by appsupport
-            statement = PyCode._from_code(self, statement)
         if not isinstance(statement, PyCode):
             raise TypeError, 'space.exec_(): expected a string, code or PyCode object'
         w_key = self.wrap('__builtins__')
diff --git a/pypy/interpreter/eval.py b/pypy/interpreter/eval.py
--- a/pypy/interpreter/eval.py
+++ b/pypy/interpreter/eval.py
@@ -100,12 +100,12 @@
 
     @jit.dont_look_inside
     def fast2locals(self):
-        # Copy values from self.fastlocals_w to self.w_locals
+        # Copy values from the fastlocals to self.w_locals
         if self.w_locals is None:
             self.w_locals = self.space.newdict()
         varnames = self.getcode().getvarnames()
         fastscope_w = self.getfastscope()
-        for i in range(min(len(varnames), len(fastscope_w))):
+        for i in range(min(len(varnames), self.getfastscopelength())):
             name = varnames[i]
             w_value = fastscope_w[i]
             if w_value is not None:
@@ -114,7 +114,7 @@
 
     @jit.dont_look_inside
     def locals2fast(self):
-        # Copy values from self.w_locals to self.fastlocals_w
+        # Copy values from self.w_locals to the fastlocals
         assert self.w_locals is not None
         varnames = self.getcode().getvarnames()
         numlocals = self.getfastscopelength()
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -16,7 +16,7 @@
 
 funccallunrolling = unrolling_iterable(range(4))
 
- at jit.purefunction_promote()
+ at jit.elidable_promote()
 def _get_immutable_code(func):
     assert not func.can_change_code
     return func.code
@@ -63,7 +63,7 @@
         if jit.we_are_jitted():
             if not self.can_change_code:
                 return _get_immutable_code(self)
-            return jit.hint(self.code, promote=True)
+            return jit.promote(self.code)
         return self.code
 
     def funccall(self, *args_w): # speed hack
@@ -98,7 +98,7 @@
                                                    self.closure)
                 for i in funccallunrolling:
                     if i < nargs:
-                        new_frame.fastlocals_w[i] = args_w[i]
+                        new_frame.locals_stack_w[i] = args_w[i]
                 return new_frame.run()
         elif nargs >= 1 and fast_natural_arity == Code.PASSTHROUGHARGS1:
             assert isinstance(code, gateway.BuiltinCodePassThroughArguments1)
@@ -158,7 +158,7 @@
                                                    self.closure)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
-            new_frame.fastlocals_w[i] = w_arg
+            new_frame.locals_stack_w[i] = w_arg
 
         return new_frame.run()
 
@@ -169,13 +169,13 @@
                                                    self.closure)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
-            new_frame.fastlocals_w[i] = w_arg
+            new_frame.locals_stack_w[i] = w_arg
 
         ndefs = len(self.defs_w)
         start = ndefs - defs_to_load
         i = nargs
         for j in xrange(start, ndefs):
-            new_frame.fastlocals_w[i] = self.defs_w[j]
+            new_frame.locals_stack_w[i] = self.defs_w[j]
             i += 1
         return new_frame.run()
 
@@ -465,19 +465,23 @@
                 space.abstract_isinstance_w(w_firstarg, self.w_class)):
             pass  # ok
         else:
-            myname = self.getname(space,"")
-            clsdescr = self.w_class.getname(space,"")
+            myname = self.getname(space, "")
+            clsdescr = self.w_class.getname(space, "")
             if clsdescr:
-                clsdescr+=" "
+                clsdescr += " instance"
+            else:
+                clsdescr = "instance"
             if w_firstarg is None:
                 instdescr = "nothing"
             else:
-                instname = space.abstract_getclass(w_firstarg).getname(space,"")
+                instname = space.abstract_getclass(w_firstarg).getname(space,
+                                                                       "")
                 if instname:
-                    instname += " "
-                instdescr = "%sinstance" %instname
-            msg = ("unbound method %s() must be called with %s"
-                   "instance as first argument (got %s instead)")
+                    instdescr = instname + " instance"
+                else:
+                    instdescr = "instance"
+            msg = ("unbound method %s() must be called with %s "
+                   "as first argument (got %s instead)")
             raise operationerrfmt(space.w_TypeError, msg,
                                   myname, clsdescr, instdescr)
         return space.call_args(self.w_function, args)
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -396,11 +396,14 @@
             fastfunc = func
         else:
             # try to avoid excessive bloat
-            if func.__module__ == 'pypy.interpreter.astcompiler.ast':
+            mod = func.__module__
+            if mod is None:
+                mod = ""
+            if mod == 'pypy.interpreter.astcompiler.ast':
                 raise FastFuncNotSupported
-            if (not func.__module__.startswith('pypy.module.__builtin__') and
-                not func.__module__.startswith('pypy.module.sys') and
-                not func.__module__.startswith('pypy.module.math')):
+            if (not mod.startswith('pypy.module.__builtin__') and
+                not mod.startswith('pypy.module.sys') and
+                not mod.startswith('pypy.module.math')):
                 if not func.__name__.startswith('descr'):
                     raise FastFuncNotSupported
             d = {}
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -62,7 +62,7 @@
             raise operr
         # XXX it's not clear that last_instr should be promoted at all
         # but as long as it is necessary for call_assembler, let's do it early
-        last_instr = jit.hint(frame.last_instr, promote=True)
+        last_instr = jit.promote(frame.last_instr)
         if last_instr == -1:
             if w_arg and not space.is_w(w_arg, space.w_None):
                 msg = "can't send non-None value to a just-started generator"
diff --git a/pypy/interpreter/module.py b/pypy/interpreter/module.py
--- a/pypy/interpreter/module.py
+++ b/pypy/interpreter/module.py
@@ -9,6 +9,8 @@
 class Module(Wrappable):
     """A module."""
 
+    _immutable_fields_ = ["w_dict?"]
+
     _frozen = False
 
     def __init__(self, space, w_name, w_dict=None, add_package=True):
diff --git a/pypy/interpreter/nestedscope.py b/pypy/interpreter/nestedscope.py
--- a/pypy/interpreter/nestedscope.py
+++ b/pypy/interpreter/nestedscope.py
@@ -170,7 +170,7 @@
         for i in range(len(args_to_copy)):
             argnum = args_to_copy[i]
             if argnum >= 0:
-                self.cells[i].set(self.fastlocals_w[argnum])
+                self.cells[i].set(self.locals_stack_w[argnum])
 
     def getfreevarname(self, index):
         freevarnames = self.pycode.co_cellvars + self.pycode.co_freevars
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -63,6 +63,7 @@
         the pypy compiler"""
         self.space = space
         eval.Code.__init__(self, name)
+        assert nlocals >= 0
         self.co_argcount = argcount
         self.co_nlocals = nlocals
         self.co_stacksize = stacksize
@@ -95,7 +96,7 @@
             if self.co_flags & CO_VARKEYWORDS:
                 argcount += 1
             # Cell vars could shadow already-set arguments.
-            # astcompiler.pyassem used to be clever about the order of
+            # The compiler used to be clever about the order of
             # the variables in both co_varnames and co_cellvars, but
             # it no longer is for the sake of simplicity.  Moreover
             # code objects loaded from CPython don't necessarily follow
@@ -202,7 +203,7 @@
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
                                       fresh_virtualizable=True)
-        args_matched = args.parse_into_scope(None, fresh_frame.fastlocals_w,
+        args_matched = args.parse_into_scope(None, fresh_frame.locals_stack_w,
                                              func.name,
                                              sig, func.defs_w)
         fresh_frame.init_cells()
@@ -215,7 +216,7 @@
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
                                       fresh_virtualizable=True)
-        args_matched = args.parse_into_scope(w_obj, fresh_frame.fastlocals_w,
+        args_matched = args.parse_into_scope(w_obj, fresh_frame.locals_stack_w,
                                              func.name,
                                              sig, func.defs_w)
         fresh_frame.init_cells()
@@ -256,7 +257,7 @@
                          tuple(self.co_freevars),
                          tuple(self.co_cellvars) )
 
-    def exec_host_bytecode(self, w_dict, w_globals, w_locals):
+    def exec_host_bytecode(self, w_globals, w_locals):
         from pypy.interpreter.pyframe import CPythonFrame
         frame = CPythonFrame(self.space, self, w_globals, None)
         frame.setdictscope(w_locals)
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -9,7 +9,7 @@
 from pypy.interpreter import pytraceback
 from pypy.rlib.objectmodel import we_are_translated, instantiate
 from pypy.rlib.jit import hint
-from pypy.rlib.debug import make_sure_not_resized
+from pypy.rlib.debug import make_sure_not_resized, check_nonneg
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib import jit
 from pypy.tool import stdlib_opcode
@@ -56,16 +56,18 @@
         assert isinstance(code, pycode.PyCode)
         self.pycode = code
         eval.Frame.__init__(self, space, w_globals)
-        self.valuestack_w = [None] * code.co_stacksize
-        self.valuestackdepth = 0
+        self.locals_stack_w = [None] * (code.co_nlocals + code.co_stacksize)
+        self.nlocals = code.co_nlocals
+        self.valuestackdepth = code.co_nlocals
         self.lastblock = None
+        make_sure_not_resized(self.locals_stack_w)
+        check_nonneg(self.nlocals)
+        #
         if space.config.objspace.honor__builtins__:
             self.builtin = space.builtin.pick_builtin(w_globals)
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
         self.initialize_frame_scopes(closure, code)
-        self.fastlocals_w = [None] * code.co_nlocals
-        make_sure_not_resized(self.fastlocals_w)
         self.f_lineno = code.co_firstlineno
 
     def mark_as_escaped(self):
@@ -184,14 +186,14 @@
     # stack manipulation helpers
     def pushvalue(self, w_object):
         depth = self.valuestackdepth
-        self.valuestack_w[depth] = w_object
+        self.locals_stack_w[depth] = w_object
         self.valuestackdepth = depth + 1
 
     def popvalue(self):
         depth = self.valuestackdepth - 1
-        assert depth >= 0, "pop from empty value stack"
-        w_object = self.valuestack_w[depth]
-        self.valuestack_w[depth] = None
+        assert depth >= self.nlocals, "pop from empty value stack"
+        w_object = self.locals_stack_w[depth]
+        self.locals_stack_w[depth] = None
         self.valuestackdepth = depth
         return w_object
 
@@ -217,24 +219,24 @@
     def peekvalues(self, n):
         values_w = [None] * n
         base = self.valuestackdepth - n
-        assert base >= 0
+        assert base >= self.nlocals
         while True:
             n -= 1
             if n < 0:
                 break
-            values_w[n] = self.valuestack_w[base+n]
+            values_w[n] = self.locals_stack_w[base+n]
         return values_w
 
     @jit.unroll_safe
     def dropvalues(self, n):
         n = hint(n, promote=True)
         finaldepth = self.valuestackdepth - n
-        assert finaldepth >= 0, "stack underflow in dropvalues()"        
+        assert finaldepth >= self.nlocals, "stack underflow in dropvalues()"
         while True:
             n -= 1
             if n < 0:
                 break
-            self.valuestack_w[finaldepth+n] = None
+            self.locals_stack_w[finaldepth+n] = None
         self.valuestackdepth = finaldepth
 
     @jit.unroll_safe
@@ -261,30 +263,30 @@
         # Contrast this with CPython where it's PEEK(-1).
         index_from_top = hint(index_from_top, promote=True)
         index = self.valuestackdepth + ~index_from_top
-        assert index >= 0, "peek past the bottom of the stack"
-        return self.valuestack_w[index]
+        assert index >= self.nlocals, "peek past the bottom of the stack"
+        return self.locals_stack_w[index]
 
     def settopvalue(self, w_object, index_from_top=0):
         index_from_top = hint(index_from_top, promote=True)
         index = self.valuestackdepth + ~index_from_top
-        assert index >= 0, "settop past the bottom of the stack"
-        self.valuestack_w[index] = w_object
+        assert index >= self.nlocals, "settop past the bottom of the stack"
+        self.locals_stack_w[index] = w_object
 
     @jit.unroll_safe
     def dropvaluesuntil(self, finaldepth):
         depth = self.valuestackdepth - 1
         finaldepth = hint(finaldepth, promote=True)
         while depth >= finaldepth:
-            self.valuestack_w[depth] = None
+            self.locals_stack_w[depth] = None
             depth -= 1
         self.valuestackdepth = finaldepth
 
-    def savevaluestack(self):
-        return self.valuestack_w[:self.valuestackdepth]
+    def save_locals_stack(self):
+        return self.locals_stack_w[:self.valuestackdepth]
 
-    def restorevaluestack(self, items_w):
-        assert None not in items_w
-        self.valuestack_w[:len(items_w)] = items_w
+    def restore_locals_stack(self, items_w):
+        self.locals_stack_w[:len(items_w)] = items_w
+        self.init_cells()
         self.dropvaluesuntil(len(items_w))
 
     def make_arguments(self, nargs):
@@ -314,11 +316,12 @@
         else:
             f_lineno = self.f_lineno
 
-        values_w = self.valuestack_w[0:self.valuestackdepth]
+        values_w = self.locals_stack_w[self.nlocals:self.valuestackdepth]
         w_valuestack = maker.slp_into_tuple_with_nulls(space, values_w)
         
         w_blockstack = nt([block._get_state_(space) for block in self.get_blocklist()])
-        w_fastlocals = maker.slp_into_tuple_with_nulls(space, self.fastlocals_w)
+        w_fastlocals = maker.slp_into_tuple_with_nulls(
+            space, self.locals_stack_w[:self.nlocals])
         if self.last_exception is None:
             w_exc_value = space.w_None
             w_tb = space.w_None
@@ -399,7 +402,8 @@
         new_frame.last_instr = space.int_w(w_last_instr)
         new_frame.frame_finished_execution = space.is_true(w_finished)
         new_frame.f_lineno = space.int_w(w_f_lineno)
-        new_frame.fastlocals_w = maker.slp_from_tuple_with_nulls(space, w_fastlocals)
+        fastlocals_w = maker.slp_from_tuple_with_nulls(space, w_fastlocals)
+        new_frame.locals_stack_w[:len(fastlocals_w)] = fastlocals_w
 
         if space.is_w(w_f_trace, space.w_None):
             new_frame.w_f_trace = None
@@ -423,28 +427,28 @@
     @jit.dont_look_inside
     def getfastscope(self):
         "Get the fast locals as a list."
-        return self.fastlocals_w
+        return self.locals_stack_w
 
     @jit.dont_look_inside
     def setfastscope(self, scope_w):
         """Initialize the fast locals from a list of values,
         where the order is according to self.pycode.signature()."""
         scope_len = len(scope_w)
-        if scope_len > len(self.fastlocals_w):
+        if scope_len > self.nlocals:
             raise ValueError, "new fastscope is longer than the allocated area"
-        # don't assign directly to 'fastlocals_w[:scope_len]' to be
+        # don't assign directly to 'locals_stack_w[:scope_len]' to be
         # virtualizable-friendly
         for i in range(scope_len):
-            self.fastlocals_w[i] = scope_w[i]
+            self.locals_stack_w[i] = scope_w[i]
         self.init_cells()
 
     def init_cells(self):
-        """Initialize cellvars from self.fastlocals_w
+        """Initialize cellvars from self.locals_stack_w.
         This is overridden in nestedscope.py"""
         pass
 
     def getfastscopelength(self):
-        return self.pycode.co_nlocals
+        return self.nlocals
 
     def getclosure(self):
         return None
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -324,7 +324,7 @@
 
     def LOAD_FAST(self, varindex, next_instr):
         # access a local variable directly
-        w_value = self.fastlocals_w[varindex]
+        w_value = self.locals_stack_w[varindex]
         if w_value is None:
             self._load_fast_failed(varindex)
         self.pushvalue(w_value)
@@ -343,7 +343,7 @@
     def STORE_FAST(self, varindex, next_instr):
         w_newvalue = self.popvalue()
         assert w_newvalue is not None
-        self.fastlocals_w[varindex] = w_newvalue
+        self.locals_stack_w[varindex] = w_newvalue
 
     def POP_TOP(self, oparg, next_instr):
         self.popvalue()
@@ -696,12 +696,12 @@
     LOAD_GLOBAL._always_inline_ = True
 
     def DELETE_FAST(self, varindex, next_instr):
-        if self.fastlocals_w[varindex] is None:
+        if self.locals_stack_w[varindex] is None:
             varname = self.getlocalvarname(varindex)
             message = "local variable '%s' referenced before assignment"
             raise operationerrfmt(self.space.w_UnboundLocalError, message,
                                   varname)
-        self.fastlocals_w[varindex] = None
+        self.locals_stack_w[varindex] = None
 
     def BUILD_TUPLE(self, itemcount, next_instr):
         items = self.popvalues(itemcount)
@@ -1048,30 +1048,18 @@
 
     def SET_ADD(self, oparg, next_instr):
         w_value = self.popvalue()
-        w_set = self.peekvalue(oparg)
+        w_set = self.peekvalue(oparg - 1)
         self.space.call_method(w_set, 'add', w_value)
 
     def MAP_ADD(self, oparg, next_instr):
         w_key = self.popvalue()
         w_value = self.popvalue()
-        w_dict = self.peekvalue(oparg)
+        w_dict = self.peekvalue(oparg - 1)
         self.space.setitem(w_dict, w_key, w_value)
 
     def SET_LINENO(self, lineno, next_instr):
         pass
 
-    def CALL_LIKELY_BUILTIN(self, oparg, next_instr):
-        # overridden by faster version in the standard object space.
-        from pypy.module.__builtin__ import OPTIMIZED_BUILTINS
-        varname = OPTIMIZED_BUILTINS[oparg >> 8]
-        w_function = self._load_global(varname)
-        nargs = oparg&0xFF
-        try:
-            w_result = self.space.call_valuestack(w_function, nargs, self)
-        finally:
-            self.dropvalues(nargs)
-        self.pushvalue(w_result)
-
     # overridden by faster version in the standard object space.
     LOOKUP_METHOD = LOAD_ATTR
     CALL_METHOD = CALL_FUNCTION
@@ -1091,12 +1079,10 @@
 
     @jit.unroll_safe
     def BUILD_SET(self, itemcount, next_instr):
-        w_set = self.space.call_function(self.space.w_set)
-        if itemcount:
-            w_add = self.space.getattr(w_set, self.space.wrap("add"))
-            for i in range(itemcount):
-                w_item = self.popvalue()
-                self.space.call_function(w_add, w_item)
+        w_set = self.space.newset()
+        for i in range(itemcount):
+            w_item = self.popvalue()
+            self.space.call_method(w_set, 'add', w_item)
         self.pushvalue(w_set)
 
     def STORE_MAP(self, oparg, next_instr):
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import py
 from pypy.interpreter.argument import (Arguments, ArgumentsForTranslation,
     ArgErr, ArgErrUnknownKwds, ArgErrMultipleValues, ArgErrCount, rawshape,
@@ -126,6 +127,7 @@
     w_AttributeError = AttributeError
     w_UnicodeEncodeError = UnicodeEncodeError
     w_dict = dict
+    w_str = str
 
 class TestArgumentsNormal(object):
 
@@ -485,26 +487,6 @@
         args._match_signature(None, l, Signature(['abc']))
         assert len(l) == 1
         assert l[0] == space.wrap(5)
-        #
-        def str_w(w):
-            try:
-                return str(w)
-            except UnicodeEncodeError:
-                raise OperationError(space.w_UnicodeEncodeError,
-                                     space.wrap("oups"))
-        space.str_w = str_w
-        w_starstar = space.wrap({u'\u1234': 5})
-        err = py.test.raises(OperationError, Arguments,
-                             space, [], w_starstararg=w_starstar)
-        # Check that we get a TypeError.  On CPython it is because of
-        # "no argument called '?'".  On PyPy we get a TypeError too, but
-        # earlier: "keyword cannot be encoded to ascii".  The
-        # difference, besides the error message, is only apparent if the
-        # receiver also takes a **arg.  Then CPython passes the
-        # non-ascii unicode unmodified, whereas PyPy complains.  We will
-        # not care until someone has a use case for that.
-        assert not err.value.match(space, space.w_UnicodeEncodeError)
-        assert     err.value.match(space, space.w_TypeError)
 
 class TestErrorHandling(object):
     def test_missing_args(self):
@@ -559,13 +541,26 @@
             assert 0, "did not raise"
 
     def test_unknown_keywords(self):
-        err = ArgErrUnknownKwds(1, ['a', 'b'], [True, False])
+        space = DummySpace()
+        err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
         s = err.getmsg('foo')
         assert s == "foo() got an unexpected keyword argument 'b'"
-        err = ArgErrUnknownKwds(2, ['a', 'b', 'c'], [True, False, False])
+        err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
+                                [True, False, False], None)
         s = err.getmsg('foo')
         assert s == "foo() got 2 unexpected keyword arguments"
 
+    def test_unknown_unicode_keyword(self):
+        class DummySpaceUnicode(DummySpace):
+            class sys:
+                defaultencoding = 'utf-8'
+        space = DummySpaceUnicode()
+        err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
+                                [True, False, True, True],
+                                [unichr(0x1234), u'b', u'c'])
+        s = err.getmsg('foo')
+        assert s == "foo() got an unexpected keyword argument '\xe1\x88\xb4'"
+
     def test_multiple_values(self):
         err = ArgErrMultipleValues('bla')
         s = err.getmsg('foo')
@@ -592,6 +587,14 @@
         exc = raises(TypeError, (lambda a, b, **kw: 0), a=1)
         assert exc.value.message == "<lambda>() takes exactly 2 non-keyword arguments (0 given)"
 
+    def test_unicode_keywords(self):
+        def f(**kwargs):
+            assert kwargs[u"&#32654;"] == 42
+        f(**{u"&#32654;" : 42})
+        def f(x): pass
+        e = raises(TypeError, "f(**{u'&#252;' : 19})")
+        assert "?" in str(e.value)
+
 def make_arguments_for_translation(space, args_w, keywords_w={},
                                    w_stararg=None, w_starstararg=None):
     return ArgumentsForTranslation(space, args_w, keywords_w.keys(),
diff --git a/pypy/interpreter/test/test_eval.py b/pypy/interpreter/test/test_eval.py
--- a/pypy/interpreter/test/test_eval.py
+++ b/pypy/interpreter/test/test_eval.py
@@ -15,16 +15,16 @@
                 self.code = code
                 Frame.__init__(self, space)
                 self.numlocals = numlocals
-                self.fastlocals_w = [None] * self.numlocals
+                self._fastlocals_w = [None] * self.numlocals
 
             def getcode(self):
                 return self.code
 
             def setfastscope(self, scope_w):
-                self.fastlocals_w = scope_w
+                self._fastlocals_w = scope_w
 
             def getfastscope(self):
-                return self.fastlocals_w
+                return self._fastlocals_w
 
             def getfastscopelength(self):
                 return self.numlocals
@@ -38,11 +38,11 @@
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({}))
         
-        self.f.fastlocals_w[0] = w(5)
+        self.f._fastlocals_w[0] = w(5)
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({'x': 5}))
 
-        self.f.fastlocals_w[2] = w(7)
+        self.f._fastlocals_w[2] = w(7)
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({'x': 5, 'args': 7}))
 
@@ -57,13 +57,13 @@
         w = self.space.wrap
         self.f.w_locals = self.space.wrap({})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [None]*5)
+        self.sameList(self.f._fastlocals_w, [None]*5)
 
         self.f.w_locals = self.space.wrap({'x': 5})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [w(5)] + [None]*4)
+        self.sameList(self.f._fastlocals_w, [w(5)] + [None]*4)
 
         self.f.w_locals = self.space.wrap({'x':5, 'args':7})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [w(5), None, w(7),
-                                            None, None])
+        self.sameList(self.f._fastlocals_w, [w(5), None, w(7),
+                                             None, None])
diff --git a/pypy/interpreter/test/test_executioncontext.py b/pypy/interpreter/test/test_executioncontext.py
--- a/pypy/interpreter/test/test_executioncontext.py
+++ b/pypy/interpreter/test/test_executioncontext.py
@@ -106,7 +106,7 @@
             if isinstance(seen[0], Method):
                 found = 'method %s of %s' % (
                     seen[0].w_function.name,
-                    seen[0].w_class.getname(space, '?'))
+                    seen[0].w_class.getname(space))
             else:
                 assert isinstance(seen[0], Function)
                 found = 'builtin %s' % seen[0].name
@@ -232,31 +232,6 @@
         assert [i[0] for i in events] == ['c_call', 'c_return', 'return', 'c_call']
         assert events[0][1] == events[1][1]
 
-    def test_tracing_range_builtinshortcut(self):
-        opts = {"objspace.opcodes.CALL_LIKELY_BUILTIN": True}
-        space = gettestobjspace(**opts)
-        source = """def f(profile):
-        import sys
-        sys.setprofile(profile)
-        range(10)
-        sys.setprofile(None)
-        """
-        w_events = space.appexec([space.wrap(source)], """(source):
-        import sys
-        l = []
-        def profile(frame, event, arg):
-            l.append((event, arg))
-        d = {}
-        exec source in d
-        f = d['f']
-        f(profile)
-        import dis
-        print dis.dis(f)
-        return l
-        """)
-        events = space.unwrap(w_events)
-        assert [i[0] for i in events] == ['c_call', 'c_return', 'c_call']
-
     def test_profile_and_exception(self):
         space = self.space
         w_res = space.appexec([], """():
@@ -280,9 +255,6 @@
         """)
 
 
-class TestExecutionContextWithCallLikelyBuiltin(TestExecutionContext):
-    keywords = {'objspace.opcodes.CALL_LIKELY_BUILTIN': True}
-
 class TestExecutionContextWithCallMethod(TestExecutionContext):
     keywords = {'objspace.opcodes.CALL_METHOD': True}
 
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -16,7 +16,7 @@
 
         def g():
             f()
-        
+
         try:
             g()
         except:
@@ -203,3 +203,27 @@
         lst = seen[:]
         assert lst == [5, 10, 2]
         raises(OSError, os.lseek, fd, 7, 0)
+
+    def test_method_attrs(self):
+        import sys
+        class A(object):
+            def m(self):
+                "aaa"
+            m.x = 3
+        class B(A):
+            pass
+
+        bm = B().m
+        assert bm.__func__ is bm.im_func
+        assert bm.__self__ is bm.im_self
+        assert bm.im_class is B
+        assert bm.__doc__ == "aaa"
+        assert bm.x == 3
+        raises(AttributeError, setattr, bm, 'x', 15)
+        l = []
+        assert l.append.__self__ is l
+        assert l.__add__.__self__ is l
+        # note: 'l.__add__.__objclass__' is not defined in pypy
+        # because it's a regular method, and .__objclass__
+        # differs from .im_class in case the method is
+        # defined in some parent class of l's actual class
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -9,7 +9,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.tool.sourcetools import compile2, func_with_new_name
 from pypy.rlib.objectmodel import instantiate, compute_identity_hash, specialize
-from pypy.rlib.jit import hint
+from pypy.rlib.jit import promote
 
 class TypeDef:
     def __init__(self, __name, __base=None, **rawdict):
@@ -206,7 +206,7 @@
             user_overridden_class = True
 
             def getclass(self, space):
-                return hint(self.w__class__, promote=True)
+                return promote(self.w__class__)
 
             def setclass(self, space, w_subtype):
                 # only used by descr_set___class__
@@ -761,12 +761,15 @@
     )
 Function.typedef.acceptable_as_base_class = False
 
-Method.typedef = TypeDef("method",
+Method.typedef = TypeDef(
+    "method",
     __new__ = interp2app(Method.descr_method__new__.im_func),
     __call__ = interp2app(Method.descr_method_call),
     __get__ = interp2app(Method.descr_method_get),
     im_func  = interp_attrproperty_w('w_function', cls=Method),
+    __func__ = interp_attrproperty_w('w_function', cls=Method),
     im_self  = interp_attrproperty_w('w_instance', cls=Method),
+    __self__ = interp_attrproperty_w('w_instance', cls=Method),
     im_class = interp_attrproperty_w('w_class', cls=Method),
     __getattribute__ = interp2app(Method.descr_method_getattribute),
     __eq__ = interp2app(Method.descr_method_eq),
diff --git a/pypy/jit/backend/arm/test/test_zrpy_gc.py b/pypy/jit/backend/arm/test/test_zrpy_gc.py
--- a/pypy/jit/backend/arm/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/arm/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -10,11 +9,10 @@
 from pypy.annotation import policy as annpolicy
 from pypy.rlib import rgc
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
 from pypy.jit.backend.arm.runner import ArmCPU
 from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
+from pypy.rlib.jit import elidable, unroll_safe
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
 from pypy.config.translationoption import DEFL_GC
@@ -85,7 +83,7 @@
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
 
-def compile(f, gc, **kwds):
+def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
     from pypy.translator.translator import TranslationContext
     from pypy.jit.metainterp.warmspot import apply_jit
@@ -109,14 +107,14 @@
                 old_value[obj, attr] = getattr(obj, attr)
                 setattr(obj, attr, value)
             #
-            apply_jit(t, enable_opts='')
+            apply_jit(t, enable_opts=enable_opts)
             #
         finally:
             for (obj, attr), oldvalue in old_value.items():
                 setattr(obj, attr, oldvalue)
 
     cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
+    cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
     cbuilder.compile()
     return cbuilder
 
@@ -153,8 +151,10 @@
 
 # ______________________________________________________________________
 
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
+
+class BaseFrameworkTests(object):
+    compile_kwds = {}
+
     def setup_class(cls):
         funcs = []
         name_to_func = {}
@@ -204,7 +204,8 @@
         try:
             GcLLDescr_framework.DEBUG = True
             cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
+                                   gcrootfinder=cls.gcrootfinder, jit=True,
+                                   **cls.compile_kwds)
         finally:
             GcLLDescr_framework.DEBUG = OLD_DEBUG
 
@@ -223,32 +224,36 @@
     def run_orig(self, name, n, x):
         self.main_allfuncs(name, n, x)
 
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
-        #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
+
+class CompileFrameworkTests(BaseFrameworkTests):
+    # Test suite using (so far) the minimark GC.
+
+##    def define_libffi_workaround(cls):
+##        # XXX: this is a workaround for a bug in database.py.  It seems that
+##        # the problem is triggered by optimizeopt/fficall.py, and in
+##        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
+##        # these tests, that line is the only place where libffi.Func is
+##        # referenced.
+##        #
+##        # The problem occurs because the gctransformer tries to annotate a
+##        # low-level helper to call the __del__ of libffi.Func when it's too
+##        # late.
+##        #
+##        # This workaround works by forcing the annotator (and all the rest of
+##        # the toolchain) to see libffi.Func in a "proper" context, not just as
+##        # the target of cast_base_ptr_to_instance.  Note that the function
+##        # below is *never* called by any actual test, it's just annotated.
+##        #
+##        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
+##        libc_name = get_libc_name()
+##        def f(n, x, *args):
+##            libc = CDLL(libc_name)
+##            ptr = libc.getpointer('labs', [types.slong], types.slong)
+##            chain = ArgChain()
+##            chain.arg(n)
+##            n = ptr.call(chain, lltype.Signed)
+##            return (n, x) + args
+##        return None, f, None
 
     def define_compile_framework_1(cls):
         # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
@@ -455,6 +460,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
@@ -492,7 +564,7 @@
         self.run('compile_framework_external_exception_handling')
 
     def define_compile_framework_bug1(self):
-        @purefunction
+        @elidable
         def nonmoving():
             x = X(1)
             for i in range(7):
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -136,6 +136,7 @@
     'call'            : (('ref', 'varargs'), 'intorptr'),
     'call_assembler'  : (('varargs',), 'intorptr'),
     'cond_call_gc_wb' : (('ptr', 'ptr'), None),
+    'cond_call_gc_wb_array': (('ptr', 'int', 'ptr'), None),
     'oosend'          : (('varargs',), 'intorptr'),
     'oosend_pure'     : (('varargs',), 'intorptr'),
     'guard_true'      : (('bool',), None),
@@ -600,15 +601,15 @@
         #
         return _op_default_implementation
 
-    def op_debug_merge_point(self, _, value, recdepth):
+    def op_debug_merge_point(self, _, *args):
         from pypy.jit.metainterp.warmspot import get_stats
-        loc = ConstPtr(value)._get_str()
         try:
             stats = get_stats()
         except AttributeError:
             pass
         else:
-            stats.add_merge_point_location(loc)
+            stats.add_merge_point_location(args[1:])
+        pass
 
     def op_guard_true(self, _, value):
         if not value:
@@ -820,6 +821,12 @@
             raise NotImplementedError
 
     def op_call(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=False)
+
+    def op_call_release_gil(self, calldescr, func, *args):
+        return self._do_call(calldescr, func, args, call_with_llptr=True)
+
+    def _do_call(self, calldescr, func, args, call_with_llptr):
         global _last_exception
         assert _last_exception is None, "exception left behind"
         assert _call_args_i == _call_args_r == _call_args_f == []
@@ -838,7 +845,8 @@
             else:
                 raise TypeError(x)
         try:
-            return _do_call_common(func, args_in_order, calldescr)
+            return _do_call_common(func, args_in_order, calldescr,
+                                   call_with_llptr)
         except LLException, lle:
             _last_exception = lle
             d = {'v': None,
@@ -850,6 +858,9 @@
     def op_cond_call_gc_wb(self, descr, a, b):
         py.test.skip("cond_call_gc_wb not supported")
 
+    def op_cond_call_gc_wb_array(self, descr, a, b, c):
+        py.test.skip("cond_call_gc_wb_array not supported")
+
     def op_oosend(self, descr, obj, *args):
         raise NotImplementedError("oosend for lltype backend??")
 
@@ -1480,17 +1491,20 @@
     'v': lltype.Void,
     }
 
-def _do_call_common(f, args_in_order=None, calldescr=None):
+def _do_call_common(f, args_in_order=None, calldescr=None,
+                    call_with_llptr=False):
     ptr = llmemory.cast_int_to_adr(f).ptr
     PTR = lltype.typeOf(ptr)
     if PTR == rffi.VOIDP:
         # it's a pointer to a C function, so we don't have a precise
         # signature: create one from the descr
+        assert call_with_llptr is True
         ARGS = map(kind2TYPE.get, calldescr.arg_types)
         RESULT = kind2TYPE[calldescr.typeinfo]
         FUNC = lltype.FuncType(ARGS, RESULT)
         func_to_call = rffi.cast(lltype.Ptr(FUNC), ptr)
     else:
+        assert call_with_llptr is False
         FUNC = PTR.TO
         ARGS = FUNC.ARGS
         func_to_call = ptr._obj._callable
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -134,7 +134,7 @@
         old, oldindex = faildescr._compiled_fail
         llimpl.compile_redirect_fail(old, oldindex, c)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """In a real assembler backend, this should assemble the given
         list of operations.  Here we just generate a similar CompiledLoop
         instance.  The code here is RPython, whereas the code in llimpl
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -1,5 +1,6 @@
 import py
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory, rclass
+from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.jit.backend.llsupport import symbolic, support
 from pypy.jit.metainterp.history import AbstractDescr, getkind, BoxInt, BoxPtr
 from pypy.jit.metainterp.history import BasicFailDescr, LoopToken, BoxFloat
@@ -45,6 +46,8 @@
     size = 0      # help translation
     is_immutable = False
 
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
+
     def __init__(self, size, count_fields_if_immut=-1):
         self.size = size
         self.count_fields_if_immut = count_fields_if_immut
@@ -149,6 +152,7 @@
 
 class BaseArrayDescr(AbstractDescr):
     _clsname = ''
+    tid = llop.combine_ushort(lltype.Signed, 0, 0)
 
     def get_base_size(self, translate_support_code):
         basesize, _, _ = symbolic.get_array_token(_A, translate_support_code)
@@ -263,6 +267,9 @@
 
     def __repr__(self):
         res = '%s(%s)' % (self.__class__.__name__, self.arg_classes)
+        extraeffect = getattr(self.extrainfo, 'extraeffect', None)
+        if extraeffect is not None:
+            res += ' EF=%r' % extraeffect
         oopspecindex = getattr(self.extrainfo, 'oopspecindex', 0)
         if oopspecindex:
             from pypy.jit.codewriter.effectinfo import EffectInfo
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -3,13 +3,16 @@
 from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
     FloatCallDescr, VoidCallDescr
 
+class UnsupportedKind(Exception):
+    pass
+
 def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
         reskind = get_ffi_type_kind(ffi_result)
         argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
-    except KeyError:
+    except UnsupportedKind:
         return None # ??
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
@@ -33,7 +36,7 @@
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
-    assert False, "Unsupported kind '%s'" % kind
+    raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
     from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,14 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        # returns a function with arguments [array, index, newvalue]
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +496,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +511,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -618,6 +553,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -710,7 +647,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -772,54 +708,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -842,10 +756,14 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
@@ -855,6 +773,24 @@
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                args = [v_base, v_index, v_value]
+                newops.append(ResOperation(rop.COND_CALL_GC_WB_ARRAY, args,
+                                           None,
+                                           descr=self.write_barrier_descr))
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -38,6 +38,11 @@
         self.frame_depth += size
         return newloc
 
+    def reserve_location_in_frame(self, size):
+        frame_depth = self.frame_depth
+        self.frame_depth += size
+        return frame_depth
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -9,7 +9,7 @@
 from pypy.jit.metainterp.resoperation import get_deep_immutable_oplist
 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 
 def test_boehm():
     gc_ll_descr = GcLLDescr_boehm(None, None, None)
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,93 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+                assert operations[0].getarg(0) == v_base
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getopnum() == rop.COND_CALL_GC_WB_ARRAY
+                assert operations[0].getarg(0) == v_base
+                assert operations[0].getarg(1) == v_index
+                assert operations[0].getarg(2) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +589,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +615,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +635,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -58,7 +58,7 @@
         """Called once by the front-end when the program stops."""
         pass
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
         looptoken.compiled_loop_token and stick extra attributes
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -57,146 +57,146 @@
         return ConstInt(heaptracker.adr2int(addr))
 
     def test_call_aligned_with_spilled_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                local_floats = list(floats)
-                local_ints = list(ints)
-                args = []
-                spills = []
-                funcargs = []
-                float_count = 0
-                int_count = 0
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append('f%d' % float_count)
-                        spills.append('force_spill(f%d)' % float_count)
-                        float_count += 1
-                        funcargs.append(F)
-                    else:
-                        args.append('i%d' % int_count)
-                        spills.append('force_spill(i%d)' % int_count)
-                        int_count += 1
-                        funcargs.append(I)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            local_floats = list(floats)
+            local_ints = list(ints)
+            args = []
+            spills = []
+            funcargs = []
+            float_count = 0
+            int_count = 0
+            for i in range(8):
+                if case & (1<<i):
+                    args.append('f%d' % float_count)
+                    spills.append('force_spill(f%d)' % float_count)
+                    float_count += 1
+                    funcargs.append(F)
+                else:
+                    args.append('i%d' % int_count)
+                    spills.append('force_spill(i%d)' % int_count)
+                    int_count += 1
+                    funcargs.append(I)
 
-                arguments = ', '.join(args)
-                spill_ops = '\n'.join(spills)
+            arguments = ', '.join(args)
+            spill_ops = '\n'.join(spills)
 
-                FUNC = self.FuncType(funcargs, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+            FUNC = self.FuncType(funcargs, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                ops = '[%s]\n' % arguments
-                ops += '%s\n' % spill_ops
-                ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
-                ops += 'finish(f99, %s)\n' % arguments
+            ops = '[%s]\n' % arguments
+            ops += '%s\n' % spill_ops
+            ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
+            ops += 'finish(f99, %s)\n' % arguments
 
-                loop = parse(ops, namespace=locals())
-                looptoken = LoopToken()
-                done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
-                self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-                expected_result = self._prepare_args(args, floats, ints)
+            loop = parse(ops, namespace=locals())
+            looptoken = LoopToken()
+            done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            expected_result = self._prepare_args(args, floats, ints)
 
-                res = self.cpu.execute_token(looptoken)
-                x = longlong.getrealfloat(cpu.get_latest_value_float(0))
-                assert abs(x - expected_result) < 0.0001
+            res = self.cpu.execute_token(looptoken)
+            x = longlong.getrealfloat(cpu.get_latest_value_float(0))
+            assert abs(x - expected_result) < 0.0001
 
     def test_call_aligned_with_imm_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(constfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(ConstInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(constfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(ConstInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_aligned_with_args_on_the_stack(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(boxfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(BoxInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(boxfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(BoxInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
         from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -583,23 +583,6 @@
                                          'int', descr=calldescr)
             assert res.value == func_ints(*args)
 
-    def test_call_to_c_function(self):
-        from pypy.rlib.libffi import CDLL, types, ArgChain
-        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
-        libc = CDLL(libc_name)
-        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
-        argchain = ArgChain().arg(ord('A'))
-        assert c_tolower.call(argchain, rffi.INT) == ord('a')
-
-        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
-        funcbox = ConstInt(heaptracker.adr2int(func_adr))
-        calldescr = self.cpu.calldescrof_dynamic([types.uchar], types.sint)
-        res = self.execute_operation(rop.CALL,
-                                     [funcbox, BoxInt(ord('A'))],
-                                     'int',
-                                     descr=calldescr)
-        assert res.value == ord('a')
-
     def test_call_with_const_floats(self):
         if not self.cpu.supports_floats:
             py.test.skip("requires floats")
@@ -1707,7 +1690,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1726,12 +1709,49 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b, c):
+            record.append((a, b, c))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
+                             lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
+                       [BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)],
+                       'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123, s)]
             else:
                 assert record == []
 
@@ -1872,6 +1892,99 @@
         assert self.cpu.get_latest_value_int(2) == 10
         assert values == [1, 10]
 
+    def test_call_to_c_function(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        c_tolower = libc.getpointer('tolower', [types.uchar], types.sint)
+        argchain = ArgChain().arg(ord('A'))
+        assert c_tolower.call(argchain, rffi.INT) == ord('a')
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_tolower.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.uchar], types.sint)
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([i1, i2])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, ord('G'))
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == ord('g')
+
+    def test_call_to_c_function_with_callback(self):
+        from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+        from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+        libc = CDLL(libc_name)
+        types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+        c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                            types_size_t, types.pointer],
+                                  types.void)
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        fn = llhelper(CALLBACK, callback)
+        S = lltype.Struct('S', ('x', rffi.INT), ('y', rffi.INT))
+        raw = lltype.malloc(S, flavor='raw')
+        argchain = ArgChain()
+        argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+        argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 4))
+        argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+        glob.lst = []
+        c_qsort.call(argchain, lltype.Void)
+        assert len(glob.lst) > 0
+        del glob.lst[:]
+
+        cpu = self.cpu
+        func_adr = llmemory.cast_ptr_to_adr(c_qsort.funcsym)
+        funcbox = ConstInt(heaptracker.adr2int(func_adr))
+        calldescr = cpu.calldescrof_dynamic([types.pointer, types_size_t,
+                                             types_size_t, types.pointer],
+                                            types.void)
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        i3 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.CALL_RELEASE_GIL, [funcbox, i0, i1, i2, i3], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(0))
+        ]
+        ops[1].setfailargs([])
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1, i2, i3], ops, looptoken)
+        self.cpu.set_future_value_int(0, rffi.cast(lltype.Signed, raw))
+        self.cpu.set_future_value_int(1, 2)
+        self.cpu.set_future_value_int(2, 4)
+        self.cpu.set_future_value_int(3, rffi.cast(lltype.Signed, fn))
+        assert glob.lst == []
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert len(glob.lst) > 0
+        lltype.free(raw, flavor='raw')
+
     def test_guard_not_invalidated(self):
         cpu = self.cpu
         i0 = BoxInt()
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -128,6 +128,8 @@
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
         self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
         debug_start('jit-backend-counts')
         self.set_debug(have_debug_prints())
         debug_stop('jit-backend-counts')
@@ -306,7 +308,66 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
-    def assemble_loop(self, inputargs, operations, looptoken, log):
+    @staticmethod
+    def _release_gil_asmgcc(css):
+        # similar to trackgcroot.py:pypy_asm_stackwalk, first part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        next = asmgcroot.gcrootanchor.next
+        new.next = next
+        new.prev = asmgcroot.gcrootanchor
+        asmgcroot.gcrootanchor.next = new
+        next.prev = new
+        # and now release the GIL
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_asmgcc(css):
+        # first reacquire the GIL
+        after = rffi.aroundstate.after
+        if after:
+            after()
+        # similar to trackgcroot.py:pypy_asm_stackwalk, second part
+        from pypy.rpython.memory.gctransform import asmgcroot
+        old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
+        prev = old.prev
+        next = old.next
+        prev.next = next
+        next.prev = prev
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+    _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
+                                                  lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        if gcrootmap.is_shadow_stack:
+            releasegil_func = llhelper(self._NOARG_FUNC,
+                                       self._release_gil_shadowstack)
+            reacqgil_func = llhelper(self._NOARG_FUNC,
+                                     self._reacquire_gil_shadowstack)
+        else:
+            releasegil_func = llhelper(self._CLOSESTACK_FUNC,
+                                       self._release_gil_asmgcc)
+            reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
+                                     self._reacquire_gil_asmgcc)
+        self.releasegil_addr  = self.cpu.cast_ptr_to_int(releasegil_func)
+        self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         '''adds the following attributes to looptoken:
                _x86_loop_code       (an integer giving an address)
                _x86_bootstrap_code  (an integer giving an address)
@@ -322,6 +383,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -329,13 +391,13 @@
 
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -354,10 +416,13 @@
         fullsize = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(looptoken)
-        debug_print("Loop #%d (%s) has address %x to %x" % (
-            looptoken.number, funcname,
+        debug_start("jit-backend-addr")
+        debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
+            looptoken.number, loopname,
             rawstart + self.looppos,
-            rawstart + directbootstrappos))
+            rawstart + directbootstrappos,
+            rawstart))
+        debug_stop("jit-backend-addr")
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -375,7 +440,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Loop # %s: %s" % (looptoken.number, funcname)
+            name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -395,7 +460,6 @@
             return
 
         self.setup(original_loop_token)
-        funcname = self._find_debug_merge_point(operations)
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(faildescr, operations)
@@ -407,7 +471,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -416,9 +481,10 @@
         fullsize = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(original_loop_token)
-
-        debug_print("Bridge out of guard %d (%s) has address %x to %x" %
-                    (descr_number, funcname, rawstart, rawstart + codeendpos))
+        debug_start("jit-backend-addr")
+        debug_print("Bridge out of Guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
+        debug_stop("jit-backend-addr")
         self._patch_stackadjust(rawstart + stackadjustpos,
                                 frame_depth + param_depth)
         self.patch_pending_failure_recoveries(rawstart)
@@ -432,7 +498,7 @@
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
-            name = "Bridge # %s: %s" % (descr_number, funcname)
+            name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
         return ops_offset
@@ -492,17 +558,6 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _find_debug_merge_point(self, operations):
-
-        for op in operations:
-            if op.getopnum() == rop.DEBUG_MERGE_POINT:
-                funcname = op.getarg(0)._get_str()
-                break
-        else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
-
     def _register_counter(self):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
@@ -652,22 +707,28 @@
         # we need to put two words into the shadowstack: the MARKER
         # and the address of the frame (ebp, actually)
         rst = gcrootmap.get_root_stack_top_addr()
-        assert rx86.fits_in_32bits(rst)
-        if IS_X86_64:
-            # cannot use rdx here, it's used to pass arguments!
-            tmp = X86_64_SCRATCH_REG
+        if rx86.fits_in_32bits(rst):
+            self.mc.MOV_rj(eax.value, rst)            # MOV eax, [rootstacktop]
         else:
-            tmp = edx
-        self.mc.MOV_rj(eax.value, rst)                # MOV eax, [rootstacktop]
-        self.mc.LEA_rm(tmp.value, (eax.value, 2*WORD))  # LEA edx, [eax+2*WORD]
+            self.mc.MOV_ri(r13.value, rst)            # MOV r13, rootstacktop
+            self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
+        #
+        self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD))  # LEA ebx, [eax+2*WORD]
         self.mc.MOV_mi((eax.value, 0), gcrootmap.MARKER)    # MOV [eax], MARKER
         self.mc.MOV_mr((eax.value, WORD), ebp.value)      # MOV [eax+WORD], ebp
-        self.mc.MOV_jr(rst, tmp.value)                # MOV [rootstacktop], edx
+        #
+        if rx86.fits_in_32bits(rst):
+            self.mc.MOV_jr(rst, ebx.value)            # MOV [rootstacktop], ebx
+        else:
+            self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
 
     def _call_footer_shadowstack(self, gcrootmap):
         rst = gcrootmap.get_root_stack_top_addr()
-        assert rx86.fits_in_32bits(rst)
-        self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+        if rx86.fits_in_32bits(rst):
+            self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+        else:
+            self.mc.MOV_ri(ebx.value, rst)           # MOV ebx, rootstacktop
+            self.mc.SUB_mi8((ebx.value, 0), 2*WORD)  # SUB [ebx], 2*WORD
 
     def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
         if IS_X86_64:
@@ -838,7 +899,7 @@
 
     def regalloc_push(self, loc):
         if isinstance(loc, RegLoc) and loc.is_xmm:
-            self.mc.SUB_ri(esp.value, 2*WORD)
+            self.mc.SUB_ri(esp.value, 8)   # = size of doubles
             self.mc.MOVSD_sx(0, loc.value)
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
             # XXX evil trick
@@ -850,7 +911,7 @@
     def regalloc_pop(self, loc):
         if isinstance(loc, RegLoc) and loc.is_xmm:
             self.mc.MOVSD_xs(loc.value, 0)
-            self.mc.ADD_ri(esp.value, 2*WORD)
+            self.mc.ADD_ri(esp.value, 8)   # = size of doubles
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
             # XXX evil trick
             self.mc.POP_b(get_ebp_ofs(loc.position + 1))
@@ -1987,6 +2048,102 @@
         self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
         self.implement_guard(guard_token, 'L')
 
+    def genop_guard_call_release_gil(self, op, guard_op, guard_token,
+                                     arglocs, result_loc):
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
+        self._genop_call(op, arglocs, result_loc, fail_index)
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, result_loc)
+        # finally, the guard_not_forced
+        self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+        self.implement_guard(guard_token, 'L')
+
+    def call_release_gil(self, gcrootmap, save_registers):
+        # First, we need to save away the registers listed in
+        # 'save_registers' that are not callee-save.  XXX We assume that
+        # the XMM registers won't be modified.  We store them in
+        # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
+        # single argument to closestack_addr below.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_sr(p, reg.value)
+                p += WORD
+        self._regalloc.reserve_param(p//WORD)
+        #
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            # note that regalloc.py used save_all_regs=True to save all
+            # registers, so we don't have to care about saving them (other
+            # than ebp) in the close_stack_struct.  But if they are registers
+            # like %eax that would be destroyed by this call, *and* they are
+            # used by arglocs for the *next* call, then trouble; for now we
+            # will just push/pop them.
+            from pypy.rpython.memory.gctransform import asmgcroot
+            css = self._regalloc.close_stack_struct
+            if css == 0:
+                use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
+                                     asmgcroot.FRAME_PTR) + 1)
+                pos = self._regalloc.fm.reserve_location_in_frame(use_words)
+                css = get_ebp_ofs(pos + use_words - 1)
+                self._regalloc.close_stack_struct = css
+            # The location where the future CALL will put its return address
+            # will be [ESP-WORD], so save that as the next frame's top address
+            self.mc.LEA_rs(eax.value, -WORD)        # LEA EAX, [ESP-4]
+            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+            self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            # Save ebp
+            index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+            self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+            # Call the closestack() function (also releasing the GIL)
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        #
+        self._emit_call(-1, imm(self.releasegil_addr), args)
+        # Finally, restore the registers saved above.
+        p = WORD
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                self.mc.MOV_rs(reg.value, p)
+                p += WORD
+
+    def call_reacquire_gil(self, gcrootmap, save_loc):
+        # save the previous result (eax/xmm0) into the stack temporarily.
+        # XXX like with call_release_gil(), we assume that we don't need
+        # to save xmm0 in this case.
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_sr(WORD, save_loc.value)
+            self._regalloc.reserve_param(2)
+        # call the reopenstack() function (also reacquiring the GIL)
+        if gcrootmap.is_shadow_stack:
+            args = []
+        else:
+            css = self._regalloc.close_stack_struct
+            assert css != 0
+            if IS_X86_32:
+                reg = eax
+            elif IS_X86_64:
+                reg = edi
+            self.mc.LEA_rb(reg.value, css)
+            args = [reg]
+        self._emit_call(-1, imm(self.reacqgil_addr), args)
+        # restore the result from the stack
+        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
+            self.mc.MOV_rs(save_loc.value, WORD)
+
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
         faildescr = guard_op.getdescr()
@@ -2076,13 +2233,26 @@
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # Write code equivalent to write_barrier() in the GC: it checks
         # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The two arguments
-        # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
+        # function remember_young_pointer() from the GC.  The arguments
+        # to the call are in arglocs[:N].  The rest, arglocs[N:], contains
         # registers that need to be saved and restored across the call.
+        # N is either 2 (regular write barrier) or 3 (array write barrier).
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
+        #
+        opnum = op.getopnum()
+        if opnum == rop.COND_CALL_GC_WB:
+            N = 2
+            func = descr.get_write_barrier_fn(self.cpu)
+        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
+            N = 3
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            raise AssertionError(opnum)
+        #
         loc_base = arglocs[0]
         self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
                       imm(descr.jit_wb_if_flag_singlebyte))
@@ -2093,33 +2263,37 @@
         if IS_X86_32:
             limit = -1      # push all arglocs on the stack
         elif IS_X86_64:
-            limit = 1       # push only arglocs[2:] on the stack
+            limit = N - 1   # push only arglocs[N:] on the stack
         for i in range(len(arglocs)-1, limit, -1):
             loc = arglocs[i]
             if isinstance(loc, RegLoc):
                 self.mc.PUSH_r(loc.value)
             else:
-                assert not IS_X86_64 # there should only be regs in arglocs[2:]
+                assert not IS_X86_64 # there should only be regs in arglocs[N:]
                 self.mc.PUSH_i32(loc.getint())
         if IS_X86_64:
             # We clobber these registers to pass the arguments, but that's
             # okay, because consider_cond_call_gc_wb makes sure that any
             # caller-save registers with values in them are present in
-            # arglocs[2:] too, so they are saved on the stack above and
+            # arglocs[N:] too, so they are saved on the stack above and
             # restored below.
-            remap_frame_layout(self, arglocs[:2], [edi, esi],
+            if N == 2:
+                callargs = [edi, esi]
+            else:
+                callargs = [edi, esi, edx]
+            remap_frame_layout(self, arglocs[:N], callargs,
                                X86_64_SCRATCH_REG)
-
+        #
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
-            self.mc.ADD_ri(esp.value, 2*WORD)
-        for i in range(2, len(arglocs)):
+            self.mc.ADD_ri(esp.value, N*WORD)
+        for i in range(N, len(arglocs)):
             loc = arglocs[i]
             assert isinstance(loc, RegLoc)
             self.mc.POP_r(loc.value)
@@ -2128,6 +2302,8 @@
         assert 0 < offset <= 127
         self.mc.overwrite(jz_location-1, chr(offset))
 
+    genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
+
     def genop_force_token(self, op, arglocs, resloc):
         # RegAlloc.consider_force_token ensures this:
         assert isinstance(resloc, RegLoc)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -156,12 +156,14 @@
         self.translate_support_code = translate_support_code
         # to be read/used by the assembler too
         self.jump_target_descr = None
+        self.close_stack_struct = 0
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -172,15 +174,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -378,7 +381,9 @@
         self.assembler.regalloc_perform_discard(op, arglocs)
 
     def can_merge_with_next_guard(self, op, i, operations):
-        if op.getopnum() == rop.CALL_MAY_FORCE or op.getopnum() == rop.CALL_ASSEMBLER:
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
             assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
             return True
         if not op.is_comparison():
@@ -729,6 +734,19 @@
         self.xrm.possibly_free_var(op.getarg(1))
 
     def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        # we need to save registers on the stack:
+        #
+        #  - at least the non-callee-saved registers
+        #
+        #  - for shadowstack, we assume that any call can collect, and we
+        #    save also the callee-saved registers that contain GC pointers,
+        #    so that they can be found by follow_stack_frame_of_assembler()
+        #
+        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
+        #    anyway, in case we need to do cpu.force().  The issue is that
+        #    grab_frame_values() would not be able to locate values in
+        #    callee-saved registers.
+        #
         save_all_regs = guard_not_forced_op is not None
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -795,6 +813,8 @@
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
+    consider_call_release_gil = consider_call_may_force
+
     def consider_call_assembler(self, op, guard_op):
         descr = op.getdescr()
         assert isinstance(descr, LoopToken)
@@ -814,12 +834,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
-        # because it will be needed anyway by the following setfield_gc.
-        # It avoids loading it twice from the memory.
-        loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        N = len(args)
+        # we force all arguments in a reg (unless they are Consts),
+        # because it will be needed anyway by the following setfield_gc
+        # or setarrayitem_gc. It avoids loading it twice from the memory.
+        arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
+                   for i in range(N)]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
@@ -833,6 +853,8 @@
         self.PerformDiscard(op, arglocs)
         self.rm.possibly_free_vars_for_op(op)
 
+    consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
+
     def fastpath_malloc_fixedsize(self, op, descr):
         assert isinstance(descr, BaseSizeDescr)
         self._do_fastpath_malloc(op, descr.size, descr.tid)
@@ -1308,7 +1330,9 @@
         name = name[len('consider_'):]
         num = getattr(rop, name.upper())
         if (is_comparison_or_ovf_op(num)
-            or num == rop.CALL_MAY_FORCE or num == rop.CALL_ASSEMBLER):
+            or num == rop.CALL_MAY_FORCE
+            or num == rop.CALL_ASSEMBLER
+            or num == rop.CALL_RELEASE_GIL):
             oplist_with_guard[num] = value
             oplist[num] = add_none_argument(value)
         else:
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -318,7 +318,9 @@
             # must be careful not to combine it with location types that
             # might need to use the scratch register themselves.
             if loc2 is X86_64_SCRATCH_REG:
-                assert code1 != 'j'
+                if code1 == 'j':
+                    assert (name.startswith("MOV") and
+                            rx86.fits_in_32bits(loc1.value_j()))
             if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"):
                 assert code2 not in ('j', 'i')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -22,6 +22,7 @@
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
+    with_threads = False
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
@@ -38,6 +39,7 @@
                 if not oprofile.OPROFILE_AVAILABLE:
                     log.WARNING('oprofile support was explicitly enabled, but oprofile headers seem not to be available')
                 profile_agent = oprofile.OProfileAgent()
+            self.with_threads = config.translation.thread
 
         self.profile_agent = profile_agent
 
@@ -77,9 +79,9 @@
         lines = machine_code_dump(data, addr, self.backend_name, label_list)
         print ''.join(lines)
 
-    def compile_loop(self, inputargs, operations, looptoken, log=True):
-        return self.assembler.assemble_loop(inputargs, operations, looptoken,
-                                            log=log)
+    def compile_loop(self, inputargs, operations, looptoken, log=True, name=''):
+        return self.assembler.assemble_loop(name, inputargs, operations,
+                                            looptoken, log=log)
 
     def compile_bridge(self, faildescr, inputargs, operations,
                        original_loop_token, log=True):
@@ -122,8 +124,8 @@
         addr = executable_token._x86_bootstrap_code
         #llop.debug_print(lltype.Void, ">>>> Entering", addr)
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
+        fail_index = self._execute_call(func)
         #llop.debug_print(lltype.Void, "<<<< Back")
-        fail_index = self._execute_call(func)
         return self.get_fail_descr_from_number(fail_index)
 
     def _execute_call(self, func):
@@ -140,10 +142,11 @@
                 LLInterpreter.current_interpreter = prev_interpreter
         return res
 
-    @staticmethod
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
+    cast_ptr_to_int._annspecialcase_ = 'specialize:arglltype(0)'
+    cast_ptr_to_int = staticmethod(cast_ptr_to_int)
 
     all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
                                        flavor='raw', zero=True,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -283,7 +283,7 @@
 # with immediate(argnum)).
 
 def encode_abs(mc, _1, _2, orbyte):
-    # expands to either '\x05' on 32-bit, or '\x04\x25' or 64-bit
+    # expands to either '\x05' on 32-bit, or '\x04\x25' on 64-bit
     if mc.WORD == 8:
         mc.writechar(chr(0x04 | orbyte))
         mc.writechar(chr(0x25))
@@ -370,6 +370,8 @@
     INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_, immediate(2))
     INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_, immediate(1),
                     immediate(2,'b'))
+    INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
+                    immediate(2,'b'))
     INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1), immediate(2,'b'))
     INSN_bi32= insn(rex_w, '\x81', orbyte(base), stack_bp(1), immediate(2))
 
@@ -388,7 +390,7 @@
     INSN_bi._always_inline_ = True      # try to constant-fold single_byte()
 
     return (INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj,
-            INSN_ji8)
+            INSN_ji8, INSN_mi8)
 
 def select_8_or_32_bit_immed(insn_8, insn_32):
     def INSN(*args):
@@ -462,18 +464,18 @@
 
     # ------------------------------ MOV ------------------------------
 
-    MOV_ri = insn(rex_w, register(1), '\xB8', immediate(2, 'q'))
+    MOV_ri = insn(register(1), '\xB8', immediate(2))
     MOV8_ri = insn(rex_fw, byte_register(1), '\xB0', immediate(2, 'b'))
 
     # ------------------------------ Arithmetic ------------------------------
 
-    ADD_ri, ADD_rr, ADD_rb, _, _, ADD_rm, ADD_rj, _ = common_modes(0)
-    OR_ri,  OR_rr,  OR_rb,  _, _, OR_rm,  OR_rj,  _ = common_modes(1)
-    AND_ri, AND_rr, AND_rb, _, _, AND_rm, AND_rj, _ = common_modes(4)
-    SUB_ri, SUB_rr, SUB_rb, _, _, SUB_rm, SUB_rj, SUB_ji8 = common_modes(5)
-    SBB_ri, SBB_rr, SBB_rb, _, _, SBB_rm, SBB_rj, _ = common_modes(3)
-    XOR_ri, XOR_rr, XOR_rb, _, _, XOR_rm, XOR_rj, _ = common_modes(6)
-    CMP_ri, CMP_rr, CMP_rb, CMP_bi, CMP_br, CMP_rm, CMP_rj, _ = common_modes(7)
+    ADD_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_ = common_modes(0)
+    OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_ = common_modes(1)
+    AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_ = common_modes(4)
+    SUB_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8 = common_modes(5)
+    SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_ = common_modes(3)
+    XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_ = common_modes(6)
+    CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_ = common_modes(7)
 
     CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
     CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
@@ -530,6 +532,7 @@
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
 
     LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
+    LEA_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
     LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
     LEA_ra = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
     LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
@@ -629,16 +632,20 @@
 
     CQO = insn(rex_w, '\x99')
 
-    # MOV_ri from the parent class is not wrong, but here is a better encoding
-    # for the common case where the immediate fits in 32 bits
+    # Three different encodings... following what gcc does.  From the
+    # shortest encoding to the longest one.
+    MOV_riu32 = insn(rex_nw, register(1), '\xB8', immediate(2, 'i'))
     MOV_ri32 = insn(rex_w, '\xC7', register(1), '\xC0', immediate(2, 'i'))
-    MOV_ri64 = AbstractX86CodeBuilder.MOV_ri
+    MOV_ri64 = insn(rex_w, register(1), '\xB8', immediate(2, 'q'))
 
     def MOV_ri(self, reg, immed):
-        if fits_in_32bits(immed):
+        if 0 <= immed <= 4294967295:
+            immed = intmask(rffi.cast(rffi.INT, immed))
+            self.MOV_riu32(reg, immed)
+        elif fits_in_32bits(immed):    # for negative values that fit in 32 bit
             self.MOV_ri32(reg, immed)
         else:
-            AbstractX86CodeBuilder.MOV_ri(self, reg, immed)
+            self.MOV_ri64(reg, immed)
 
 def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR'):
     def add_insn(code, *modrm):
diff --git a/pypy/jit/backend/x86/test/test_assembler.py b/pypy/jit/backend/x86/test/test_assembler.py
--- a/pypy/jit/backend/x86/test/test_assembler.py
+++ b/pypy/jit/backend/x86/test/test_assembler.py
@@ -1,13 +1,15 @@
 from pypy.jit.backend.x86.regloc import *
 from pypy.jit.backend.x86.assembler import Assembler386
 from pypy.jit.backend.x86.regalloc import X86FrameManager, get_ebp_ofs
-from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, INT, REF, FLOAT
+from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, ConstFloat
+from pypy.jit.metainterp.history import INT, REF, FLOAT
 from pypy.rlib.rarithmetic import intmask
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
 from pypy.jit.backend.detect_cpu import getcpuclass 
 from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86_64_RegisterManager, X86XMMRegisterManager, X86_64_XMMRegisterManager
 from pypy.jit.codewriter import longlong
+import ctypes
 
 ACTUAL_CPU = getcpuclass()
 
@@ -238,3 +240,103 @@
         assert assembler.fail_boxes_int.getitem(i) == expected_ints[i]
         assert assembler.fail_boxes_ptr.getitem(i) == expected_ptrs[i]
         assert assembler.fail_boxes_float.getitem(i) == expected_floats[i]
+
+# ____________________________________________________________
+
+class TestRegallocPushPop(object):
+
+    def do_test(self, callback):
+        from pypy.jit.backend.x86.regalloc import X86FrameManager
+        from pypy.jit.backend.x86.regalloc import X86XMMRegisterManager
+        class FakeToken:
+            class compiled_loop_token:
+                asmmemmgr_blocks = None
+        cpu = ACTUAL_CPU(None, None)
+        cpu.setup()
+        looptoken = FakeToken()
+        asm = cpu.assembler
+        asm.setup_once()
+        asm.setup(looptoken)
+        self.fm = X86FrameManager()
+        self.xrm = X86XMMRegisterManager(None, frame_manager=self.fm,
+                                         assembler=asm)
+        callback(asm)
+        asm.mc.RET()
+        rawstart = asm.materialize_loop(looptoken)
+        #
+        F = ctypes.CFUNCTYPE(ctypes.c_long)
+        fn = ctypes.cast(rawstart, F)
+        res = fn()
+        return res
+
+    def test_simple(self):
+        def callback(asm):
+            asm.mov(imm(42), edx)
+            asm.regalloc_push(edx)
+            asm.regalloc_pop(eax)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_push_stack(self):
+        def callback(asm):
+            loc = self.fm.frame_pos(5, INT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(imm(42), loc)
+            asm.regalloc_push(loc)
+            asm.regalloc_pop(eax)
+            asm.mc.ADD_ri(esp.value, 64)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_pop_stack(self):
+        def callback(asm):
+            loc = self.fm.frame_pos(5, INT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(imm(42), edx)
+            asm.regalloc_push(edx)
+            asm.regalloc_pop(loc)
+            asm.mov(loc, eax)
+            asm.mc.ADD_ri(esp.value, 64)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_simple_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            asm.mov(loc, xmm5)
+            asm.regalloc_push(xmm5)
+            asm.regalloc_pop(xmm0)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
+
+    def test_push_stack_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            loc2 = self.fm.frame_pos(4, FLOAT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(loc, xmm5)
+            asm.mov(xmm5, loc2)
+            asm.regalloc_push(loc2)
+            asm.regalloc_pop(xmm0)
+            asm.mc.ADD_ri(esp.value, 64)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
+
+    def test_pop_stack_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            loc2 = self.fm.frame_pos(4, FLOAT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(loc, xmm5)
+            asm.regalloc_push(xmm5)
+            asm.regalloc_pop(loc2)
+            asm.mov(loc2, xmm0)
+            asm.mc.ADD_ri(esp.value, 64)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -6,6 +6,7 @@
                                          ConstPtr, Box, BoxFloat, BasicFailDescr)
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
+from pypy.jit.backend.x86.rx86 import fits_in_32bits
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.executor import execute
@@ -241,6 +242,23 @@
         c = self.execute_operation(rop.GETFIELD_GC, [res], 'int', ofsc3)
         assert c.value == 3
 
+    def test_bug_setfield_64bit(self):
+        if WORD == 4:
+            py.test.skip("only for 64 bits")
+        TP = lltype.GcStruct('S', ('i', lltype.Signed))
+        ofsi = self.cpu.fielddescrof(TP, 'i')
+        for i in range(500):
+            p = lltype.malloc(TP)
+            addr = rffi.cast(lltype.Signed, p)
+            if fits_in_32bits(addr):
+                break    # fitting in 32 bits, good
+        else:
+            py.test.skip("cannot get a 32-bit pointer")
+        res = ConstPtr(rffi.cast(llmemory.GCREF, addr))
+        self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)],
+                               'void', ofsi)
+        assert p.i == 3**33
+
     def test_nullity_with_guard(self):
         allops = [rop.INT_IS_TRUE]
         guards = [rop.GUARD_TRUE, rop.GUARD_FALSE]
@@ -330,6 +348,7 @@
                         assert result != expected
 
     def test_compile_bridge_check_profile_info(self):
+        py.test.skip("does not work, reinvestigate")
         class FakeProfileAgent(object):
             def __init__(self):
                 self.functions = []
@@ -362,7 +381,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +397,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -185,15 +185,32 @@
     cb = CodeBuilder32
     assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00')
 
+def test_sub_ji8():
+    cb = CodeBuilder32
+    assert_encodes_as(cb, 'SUB_ji8', (11223344, 55),
+                      '\x83\x2D\x30\x41\xAB\x00\x37')
+    assert_encodes_as(cb, 'SUB_mi8', ((edx, 16), 55),
+                      '\x83\x6A\x10\x37')
+
 class CodeBuilder64(CodeBuilderMixin, X86_64_CodeBuilder):
     pass
 
 def test_mov_ri_64():
     s = CodeBuilder64()
     s.MOV_ri(ecx, -2)
+    s.MOV_ri(r15, -3)
+    s.MOV_ri(ebx, -0x80000003)
+    s.MOV_ri(r13, -0x80000002)
+    s.MOV_ri(ecx, 42)
     s.MOV_ri(r12, 0x80000042)
+    s.MOV_ri(r12, 0x100000007)
     assert s.getvalue() == ('\x48\xC7\xC1\xFE\xFF\xFF\xFF' +
-                            '\x49\xBC\x42\x00\x00\x80\x00\x00\x00\x00')
+                            '\x49\xC7\xC7\xFD\xFF\xFF\xFF' +
+                            '\x48\xBB\xFD\xFF\xFF\x7F\xFF\xFF\xFF\xFF' +
+                            '\x49\xBD\xFE\xFF\xFF\x7F\xFF\xFF\xFF\xFF' +
+                            '\xB9\x2A\x00\x00\x00' +
+                            '\x41\xBC\x42\x00\x00\x80' +
+                            '\x49\xBC\x07\x00\x00\x00\x01\x00\x00\x00')
 
 def test_mov_rm_64():
     s = CodeBuilder64()
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -212,6 +212,17 @@
             for mode, v in zip(argmodes, args):
                 ops.append(assembler_operand[mode](v))
             ops.reverse()
+            #
+            if (instrname.lower() == 'mov' and suffix == 'q' and
+                ops[0].startswith('$') and 0 <= int(ops[0][1:]) <= 4294967295
+                and ops[1].startswith('%r')):
+                # movq $xxx, %rax => movl $xxx, %eax
+                suffix = 'l'
+                if ops[1][2:].isdigit():
+                    ops[1] += 'd'
+                else:
+                    ops[1] = '%e' + ops[1][2:]
+            #
             op = '\t%s%s %s%s' % (instrname.lower(), suffix,
                                   ', '.join(ops), following)
             g.write('%s\n' % op)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -10,16 +9,11 @@
 from pypy.annotation import policy as annpolicy
 from pypy.rlib import rgc
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
+from pypy.rlib.jit import elidable, unroll_safe
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
 from pypy.config.translationoption import DEFL_GC
-import py.test
 
 class X(object):
     def __init__(self, x=0):
@@ -86,7 +80,7 @@
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
 
-def compile(f, gc, **kwds):
+def compile(f, gc, enable_opts='', **kwds):
     from pypy.annotation.listdef import s_list_of_strings
     from pypy.translator.translator import TranslationContext
     from pypy.jit.metainterp.warmspot import apply_jit
@@ -110,14 +104,14 @@
                 old_value[obj, attr] = getattr(obj, attr)
                 setattr(obj, attr, value)
             #
-            apply_jit(t, enable_opts='')
+            apply_jit(t, enable_opts=enable_opts)
             #
         finally:
             for (obj, attr), oldvalue in old_value.items():
                 setattr(obj, attr, oldvalue)
 
     cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
+    cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
     cbuilder.compile()
     return cbuilder
 
@@ -154,8 +148,10 @@
 
 # ______________________________________________________________________
 
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
+
+class BaseFrameworkTests(object):
+    compile_kwds = {}
+
     def setup_class(cls):
         funcs = []
         name_to_func = {}
@@ -205,7 +201,8 @@
         try:
             GcLLDescr_framework.DEBUG = True
             cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
+                                   gcrootfinder=cls.gcrootfinder, jit=True,
+                                   **cls.compile_kwds)
         finally:
             GcLLDescr_framework.DEBUG = OLD_DEBUG
 
@@ -224,32 +221,36 @@
     def run_orig(self, name, n, x):
         self.main_allfuncs(name, n, x)
 
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
-        #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
+
+class CompileFrameworkTests(BaseFrameworkTests):
+    # Test suite using (so far) the minimark GC.
+
+##    def define_libffi_workaround(cls):
+##        # XXX: this is a workaround for a bug in database.py.  It seems that
+##        # the problem is triggered by optimizeopt/fficall.py, and in
+##        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
+##        # these tests, that line is the only place where libffi.Func is
+##        # referenced.
+##        #
+##        # The problem occurs because the gctransformer tries to annotate a
+##        # low-level helper to call the __del__ of libffi.Func when it's too
+##        # late.
+##        #
+##        # This workaround works by forcing the annotator (and all the rest of
+##        # the toolchain) to see libffi.Func in a "proper" context, not just as
+##        # the target of cast_base_ptr_to_instance.  Note that the function
+##        # below is *never* called by any actual test, it's just annotated.
+##        #
+##        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
+##        libc_name = get_libc_name()
+##        def f(n, x, *args):
+##            libc = CDLL(libc_name)
+##            ptr = libc.getpointer('labs', [types.slong], types.slong)
+##            chain = ArgChain()
+##            chain.arg(n)
+##            n = ptr.call(chain, lltype.Signed)
+##            return (n, x) + args
+##        return None, f, None
 
     def define_compile_framework_1(cls):
         # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
@@ -456,6 +457,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
@@ -493,7 +561,7 @@
         self.run('compile_framework_external_exception_handling')
 
     def define_compile_framework_bug1(self):
-        @purefunction
+        @elidable
         def nonmoving():
             x = X(1)
             for i in range(7):
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
copy from pypy/jit/backend/x86/test/test_zrpy_gc.py
copy to pypy/jit/backend/x86/test/test_zrpy_releasegil.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_releasegil.py
@@ -1,618 +1,110 @@
-"""
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
-"""
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rlib.jit import dont_look_inside
+from pypy.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
 
-import weakref
-import py, os
-from pypy.annotation import policy as annpolicy
-from pypy.rlib import rgc
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.jit import JitDriver, dont_look_inside
-from pypy.rlib.jit import purefunction, unroll_safe
-from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
-from pypy.tool.udir import udir
-from pypy.jit.backend.x86.arch import IS_X86_64
-from pypy.config.translationoption import DEFL_GC
-import py.test
+from pypy.rlib.libffi import CDLL, types, ArgChain, clibffi
+from pypy.rpython.lltypesystem.ll2ctypes import libc_name
+from pypy.rpython.annlowlevel import llhelper
 
-class X(object):
-    def __init__(self, x=0):
-        self.x = x
+from pypy.jit.backend.x86.test.test_zrpy_gc import BaseFrameworkTests
+from pypy.jit.backend.x86.test.test_zrpy_gc import check
 
-    next = None
 
-class CheckError(Exception):
-    pass
+class ReleaseGILTests(BaseFrameworkTests):
+    compile_kwds = dict(enable_opts=ALL_OPTS_NAMES, thread=True)
 
-def check(flag):
-    if not flag:
-        raise CheckError
-
-def get_g(main):
-    main._dont_inline_ = True
-    def g(name, n):
-        x = X()
-        x.foo = 2
-        main(n, x)
-        x.foo = 5
-        return weakref.ref(x)
-    g._dont_inline_ = True
-    return g
-
-
-def get_entry(g):
-
-    def entrypoint(args):
-        name = ''
-        n = 2000
-        argc = len(args)
-        if argc > 1:
-            name = args[1]
-        if argc > 2:
-            n = int(args[2])
-        r_list = []
-        for i in range(20):
-            r = g(name, n)
-            r_list.append(r)
-            rgc.collect()
-        rgc.collect(); rgc.collect()
-        freed = 0
-        for r in r_list:
-            if r() is None:
-                freed += 1
-        print freed
-        return 0
-
-    return entrypoint
-
-
-def get_functions_to_patch():
-    from pypy.jit.backend.llsupport import gc
-    #
-    can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
-    def can_inline_malloc2(*args):
-        try:
-            if os.environ['PYPY_NO_INLINE_MALLOC']:
-                return False
-        except KeyError:
+    def define_simple(self):
+        class Glob:
             pass
-        return can_inline_malloc1(*args)
-    #
-    return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
-
-def compile(f, gc, **kwds):
-    from pypy.annotation.listdef import s_list_of_strings
-    from pypy.translator.translator import TranslationContext
-    from pypy.jit.metainterp.warmspot import apply_jit
-    from pypy.translator.c import genc
-    #
-    t = TranslationContext()
-    t.config.translation.gc = gc
-    if gc != 'boehm':
-        t.config.translation.gcremovetypeptr = True
-    for name, value in kwds.items():
-        setattr(t.config.translation, name, value)
-    ann = t.buildannotator(policy=annpolicy.StrictAnnotatorPolicy())
-    ann.build_types(f, [s_list_of_strings], main_entry_point=True)
-    t.buildrtyper().specialize()
-
-    if kwds['jit']:
-        patch = get_functions_to_patch()
-        old_value = {}
-        try:
-            for (obj, attr), value in patch.items():
-                old_value[obj, attr] = getattr(obj, attr)
-                setattr(obj, attr, value)
-            #
-            apply_jit(t, enable_opts='')
-            #
-        finally:
-            for (obj, attr), oldvalue in old_value.items():
-                setattr(obj, attr, oldvalue)
-
-    cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    cbuilder.generate_source()
-    cbuilder.compile()
-    return cbuilder
-
-def run(cbuilder, args=''):
-    #
-    pypylog = udir.join('test_zrpy_gc.log')
-    data = cbuilder.cmdexec(args, env={'PYPYLOG': ':%s' % pypylog})
-    return data.strip()
-
-def compile_and_run(f, gc, **kwds):
-    cbuilder = compile(f, gc, **kwds)
-    return run(cbuilder)
-
-
-
-def test_compile_boehm():
-    myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
-    @dont_look_inside
-    def see(lst, n):
-        assert len(lst) == 3
-        assert lst[0] == n+10
-        assert lst[1] == n+20
-        assert lst[2] == n+30
-    def main(n, x):
-        while n > 0:
-            myjitdriver.can_enter_jit(n=n, x=x)
-            myjitdriver.jit_merge_point(n=n, x=x)
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            see([n+10, n+20, n+30], n)
-    res = compile_and_run(get_entry(get_g(main)), "boehm", jit=True)
-    assert int(res) >= 16
-
-# ______________________________________________________________________
-
-class CompileFrameworkTests(object):
-    # Test suite using (so far) the minimark GC.
-    def setup_class(cls):
-        funcs = []
-        name_to_func = {}
-        for fullname in dir(cls):
-            if not fullname.startswith('define'):
-                continue
-            definefunc = getattr(cls, fullname)
-            _, name = fullname.split('_', 1)
-            beforefunc, loopfunc, afterfunc = definefunc.im_func(cls)
-            if beforefunc is None:
-                def beforefunc(n, x):
-                    return n, x, None, None, None, None, None, None, None, None, None, ''
-            if afterfunc is None:
-                def afterfunc(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-                    pass
-            beforefunc.func_name = 'before_'+name
-            loopfunc.func_name = 'loop_'+name
-            afterfunc.func_name = 'after_'+name
-            funcs.append((beforefunc, loopfunc, afterfunc))
-            assert name not in name_to_func
-            name_to_func[name] = len(name_to_func)
-        print name_to_func
-        def allfuncs(name, n):
-            x = X()
-            x.foo = 2
-            main_allfuncs(name, n, x)
-            x.foo = 5
-            return weakref.ref(x)
-        def main_allfuncs(name, n, x):
-            num = name_to_func[name]
-            n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][0](n, x)
-            while n > 0:
-                myjitdriver.can_enter_jit(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-                myjitdriver.jit_merge_point(num=num, n=n, x=x, x0=x0, x1=x1,
-                        x2=x2, x3=x3, x4=x4, x5=x5, x6=x6, x7=x7, l=l, s=s)
-
-                n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s = funcs[num][1](
-                        n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-            funcs[num][2](n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s)
-        myjitdriver = JitDriver(greens = ['num'],
-                                reds = ['n', 'x', 'x0', 'x1', 'x2', 'x3', 'x4',
-                                        'x5', 'x6', 'x7', 'l', 's'])
-        cls.main_allfuncs = staticmethod(main_allfuncs)
-        cls.name_to_func = name_to_func
-        OLD_DEBUG = GcLLDescr_framework.DEBUG
-        try:
-            GcLLDescr_framework.DEBUG = True
-            cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
-                                   gcrootfinder=cls.gcrootfinder, jit=True)
-        finally:
-            GcLLDescr_framework.DEBUG = OLD_DEBUG
-
-    def _run(self, name, n, env):
-        res = self.cbuilder.cmdexec("%s %d" %(name, n), env=env)
-        assert int(res) == 20
-
-    def run(self, name, n=2000):
-        pypylog = udir.join('TestCompileFramework.log')
-        env = {'PYPYLOG': ':%s' % pypylog,
-               'PYPY_NO_INLINE_MALLOC': '1'}
-        self._run(name, n, env)
-        env['PYPY_NO_INLINE_MALLOC'] = ''
-        self._run(name, n, env)
-
-    def run_orig(self, name, n, x):
-        self.main_allfuncs(name, n, x)
-
-    def define_libffi_workaround(cls):
-        # XXX: this is a workaround for a bug in database.py.  It seems that
-        # the problem is triggered by optimizeopt/fficall.py, and in
-        # particular by the ``cast_base_ptr_to_instance(Func, llfunc)``: in
-        # these tests, that line is the only place where libffi.Func is
-        # referenced.
+        glob = Glob()
         #
-        # The problem occurs because the gctransformer tries to annotate a
-        # low-level helper to call the __del__ of libffi.Func when it's too
-        # late.
-        #
-        # This workaround works by forcing the annotator (and all the rest of
-        # the toolchain) to see libffi.Func in a "proper" context, not just as
-        # the target of cast_base_ptr_to_instance.  Note that the function
-        # below is *never* called by any actual test, it's just annotated.
-        #
-        from pypy.rlib.libffi import get_libc_name, CDLL, types, ArgChain
-        libc_name = get_libc_name()
-        def f(n, x, *args):
-            libc = CDLL(libc_name)
-            ptr = libc.getpointer('labs', [types.slong], types.slong)
-            chain = ArgChain()
-            chain.arg(n)
-            n = ptr.call(chain, lltype.Signed)
-            return (n, x) + args
-        return None, f, None
-
-    def define_compile_framework_1(cls):
-        # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
-        # without write_barriers and root stack enumeration.
-        def f(n, x, *args):
-            y = X()
-            y.foo = x.foo
-            n -= y.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_1(self):
-        self.run('compile_framework_1')
-
-    def define_compile_framework_2(cls):
-        # More complex test, requires root stack enumeration but
-        # not write_barriers.
-        def f(n, x, *args):
-            prev = x
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = prev.foo
-                prev = y
-            n -= prev.foo
-            return (n, x) + args
-        return None, f, None
-
-    def test_compile_framework_2(self):
-        self.run('compile_framework_2')
-
-    def define_compile_framework_3(cls):
-        # Third version of the test.  Really requires write_barriers.
-        def f(n, x, *args):
-            x.next = None
-            for j in range(101):    # f() runs 20'000 times, thus allocates
-                y = X()             # a total of 2'020'000 objects
-                y.foo = j+1
-                y.next = x.next
-                x.next = y
-            check(x.next.foo == 101)
-            total = 0
-            y = x
-            for j in range(101):
-                y = y.next
-                total += y.foo
-            check(not y.next)
-            check(total == 101*102/2)
-            n -= x.foo
-            return (n, x) + args
-        return None, f, None
-
-
-
-    def test_compile_framework_3(self):
-        x_test = X()
-        x_test.foo = 5
-        self.run_orig('compile_framework_3', 6, x_test)     # check that it does not raise CheckError
-        self.run('compile_framework_3')
-
-    def define_compile_framework_3_extra(cls):
-        # Extra version of the test, with tons of live vars around the residual
-        # call that all contain a GC pointer.
-        @dont_look_inside
-        def residual(n=26):
-            x = X()
-            x.next = X()
-            x.next.foo = n
-            return x
+        def f42(n):
+            c_strchr = glob.c_strchr
+            raw = rffi.str2charp("foobar" + chr((n & 63) + 32))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.INT, ord('b')))
+            res = c_strchr.call(argchain, rffi.CCHARP)
+            check(rffi.charp2str(res) == "bar" + chr((n & 63) + 32))
+            rffi.free_charp(raw)
         #
         def before(n, x):
-            residual(5)
-            x0 = residual()
-            x1 = residual()
-            x2 = residual()
-            x3 = residual()
-            x4 = residual()
-            x5 = residual()
-            x6 = residual()
-            x7 = residual()
-            n *= 19
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x8 = residual()
-            x9 = residual()
-            check(x0.next.foo == 26)
-            check(x1.next.foo == 26)
-            check(x2.next.foo == 26)
-            check(x3.next.foo == 26)
-            check(x4.next.foo == 26)
-            check(x5.next.foo == 26)
-            check(x6.next.foo == 26)
-            check(x7.next.foo == 26)
-            check(x8.next.foo == 26)
-            check(x9.next.foo == 26)
-            x0, x1, x2, x3, x4, x5, x6, x7 = x7, x4, x6, x5, x3, x2, x9, x8
+            libc = CDLL(libc_name)
+            c_strchr = libc.getpointer('strchr', [types.pointer, types.sint],
+                                       types.pointer)
+            glob.c_strchr = c_strchr
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42(n)
             n -= 1
-            return n, None, x0, x1, x2, x3, x4, x5, x6, x7, None, None
-        return before, f, None
-
-    def test_compile_framework_3_extra(self):
-        self.run_orig('compile_framework_3_extra', 6, None)     # check that it does not raise CheckError
-        self.run('compile_framework_3_extra')
-
-    def define_compile_framework_4(cls):
-        # Fourth version of the test, with __del__.
-        from pypy.rlib.debug import debug_print
-        class Counter:
-            cnt = 0
-        counter = Counter()
-        class Z:
-            def __del__(self):
-                counter.cnt -= 1
-        def before(n, x):
-            debug_print('counter.cnt =', counter.cnt)
-            check(counter.cnt < 5)
-            counter.cnt = n // x.foo
-            return n, x, None, None, None, None, None, None, None, None, None, None
-        def f(n, x, *args):
-            Z()
-            n -= x.foo
             return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_4(self):
-        self.run('compile_framework_4')
+    def test_simple(self):
+        self.run('simple')
 
-    def define_compile_framework_5(cls):
-        # Test string manipulation.
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            n -= x.foo
-            s += str(n)
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(s) == 1*5 + 2*45 + 3*450 + 4*500)
-        return None, f, after
-
-    def test_compile_framework_5(self):
-        self.run('compile_framework_5')
-
-    def define_compile_framework_7(cls):
-        # Array of pointers (test the write barrier for setarrayitem_gc)
+    def define_close_stack(self):
+        #
+        class Glob(object):
+            pass
+        glob = Glob()
+        class X(object):
+            pass
+        #
+        def callback(p1, p2):
+            for i in range(100):
+                glob.lst.append(X())
+            return rffi.cast(rffi.INT, 1)
+        CALLBACK = lltype.Ptr(lltype.FuncType([lltype.Signed,
+                                               lltype.Signed], rffi.INT))
+        #
+        @dont_look_inside
+        def alloc1():
+            return llmemory.raw_malloc(16)
+        @dont_look_inside
+        def free1(p):
+            llmemory.raw_free(p)
+        #
+        def f42():
+            length = len(glob.lst)
+            c_qsort = glob.c_qsort
+            raw = alloc1()
+            fn = llhelper(CALLBACK, rffi._make_wrapper_for(CALLBACK, callback))
+            argchain = ArgChain()
+            argchain = argchain.arg(rffi.cast(lltype.Signed, raw))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 2))
+            argchain = argchain.arg(rffi.cast(rffi.SIZE_T, 8))
+            argchain = argchain.arg(rffi.cast(lltype.Signed, fn))
+            c_qsort.call(argchain, lltype.Void)
+            free1(raw)
+            check(len(glob.lst) > length)
+            del glob.lst[:]
+        #
         def before(n, x):
-            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            if n < 1900:
-                check(l[0].x == 123)
-                l = [None] * 16
-                l[0] = X(123)
-                l[1] = X(n)
-                l[2] = X(n+10)
-                l[3] = X(n+20)
-                l[4] = X(n+30)
-                l[5] = X(n+40)
-                l[6] = X(n+50)
-                l[7] = X(n+60)
-                l[8] = X(n+70)
-                l[9] = X(n+80)
-                l[10] = X(n+90)
-                l[11] = X(n+100)
-                l[12] = X(n+110)
-                l[13] = X(n+120)
-                l[14] = X(n+130)
-                l[15] = X(n+140)
-            if n < 1800:
-                check(len(l) == 16)
-                check(l[0].x == 123)
-                check(l[1].x == n)
-                check(l[2].x == n+10)
-                check(l[3].x == n+20)
-                check(l[4].x == n+30)
-                check(l[5].x == n+40)
-                check(l[6].x == n+50)
-                check(l[7].x == n+60)
-                check(l[8].x == n+70)
-                check(l[9].x == n+80)
-                check(l[10].x == n+90)
-                check(l[11].x == n+100)
-                check(l[12].x == n+110)
-                check(l[13].x == n+120)
-                check(l[14].x == n+130)
-                check(l[15].x == n+140)
-            n -= x.foo
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(len(l) == 16)
-            check(l[0].x == 123)
-            check(l[1].x == 2)
-            check(l[2].x == 12)
-            check(l[3].x == 22)
-            check(l[4].x == 32)
-            check(l[5].x == 42)
-            check(l[6].x == 52)
-            check(l[7].x == 62)
-            check(l[8].x == 72)
-            check(l[9].x == 82)
-            check(l[10].x == 92)
-            check(l[11].x == 102)
-            check(l[12].x == 112)
-            check(l[13].x == 122)
-            check(l[14].x == 132)
-            check(l[15].x == 142)
-        return before, f, after
-
-    def test_compile_framework_7(self):
-        self.run('compile_framework_7')
-
-    def define_compile_framework_external_exception_handling(cls):
-        def before(n, x):
-            x = X(0)
-            return n, x, None, None, None, None, None, None, None, None, None, None
-
-        @dont_look_inside
-        def g(x):
-            if x > 200:
-                return 2
-            raise ValueError
-        @dont_look_inside
-        def h(x):
-            if x > 150:
-                raise ValueError
-            return 2
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            try:
-                x.x += g(n)
-            except ValueError:
-                x.x += 1
-            try:
-                x.x += h(n)
-            except ValueError:
-                x.x -= 1
+            libc = CDLL(libc_name)
+            types_size_t = clibffi.cast_type_to_ffitype(rffi.SIZE_T)
+            c_qsort = libc.getpointer('qsort', [types.pointer, types_size_t,
+                                                types_size_t, types.pointer],
+                                      types.void)
+            glob.c_qsort = c_qsort
+            glob.lst = []
+            return (n, None, None, None, None, None,
+                    None, None, None, None, None, None)
+        #
+        def f(n, x, *args):
+            f42()
             n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            check(x.x == 1800 * 2 + 1850 * 2 + 200 - 150)
-
+            return (n, x) + args
         return before, f, None
 
-    def test_compile_framework_external_exception_handling(self):
-        self.run('compile_framework_external_exception_handling')
+    def test_close_stack(self):
+        self.run('close_stack')
 
-    def define_compile_framework_bug1(self):
-        @purefunction
-        def nonmoving():
-            x = X(1)
-            for i in range(7):
-                rgc.collect()
-            return x
 
-        @dont_look_inside
-        def do_more_stuff():
-            x = X(5)
-            for i in range(7):
-                rgc.collect()
-            return x
-
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            x0 = do_more_stuff()
-            check(nonmoving().x == 1)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-
-        return None, f, None
-
-    def test_compile_framework_bug1(self):
-        self.run('compile_framework_bug1', 200)
-
-    def define_compile_framework_vref(self):
-        from pypy.rlib.jit import virtual_ref, virtual_ref_finish
-        class A:
-            pass
-        glob = A()
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            a = A()
-            glob.v = vref = virtual_ref(a)
-            virtual_ref_finish(vref, a)
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_vref(self):
-        self.run('compile_framework_vref', 200)
-
-    def define_compile_framework_float(self):
-        # test for a bug: the fastpath_malloc does not save and restore
-        # xmm registers around the actual call to the slow path
-        class A:
-            x0 = x1 = x2 = x3 = x4 = x5 = x6 = x7 = 0
-        @dont_look_inside
-        def escape1(a):
-            a.x0 += 0
-            a.x1 += 6
-            a.x2 += 12
-            a.x3 += 18
-            a.x4 += 24
-            a.x5 += 30
-            a.x6 += 36
-            a.x7 += 42
-        @dont_look_inside
-        def escape2(n, f0, f1, f2, f3, f4, f5, f6, f7):
-            check(f0 == n + 0.0)
-            check(f1 == n + 0.125)
-            check(f2 == n + 0.25)
-            check(f3 == n + 0.375)
-            check(f4 == n + 0.5)
-            check(f5 == n + 0.625)
-            check(f6 == n + 0.75)
-            check(f7 == n + 0.875)
-        @unroll_safe
-        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            i = 0
-            while i < 42:
-                m = n + i
-                f0 = m + 0.0
-                f1 = m + 0.125
-                f2 = m + 0.25
-                f3 = m + 0.375
-                f4 = m + 0.5
-                f5 = m + 0.625
-                f6 = m + 0.75
-                f7 = m + 0.875
-                a1 = A()
-                # at this point, all or most f's are still in xmm registers
-                escape1(a1)
-                escape2(m, f0, f1, f2, f3, f4, f5, f6, f7)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f, None
-
-    def test_compile_framework_float(self):
-        self.run('compile_framework_float')
-
-    def define_compile_framework_minimal_size_in_nursery(self):
-        S = lltype.GcStruct('S')    # no fields!
-        T = lltype.GcStruct('T', ('i', lltype.Signed))
-        @unroll_safe
-        def f42(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
-            lst1 = []
-            lst2 = []
-            i = 0
-            while i < 42:
-                s1 = lltype.malloc(S)
-                t1 = lltype.malloc(T)
-                t1.i = 10000 + i + n
-                lst1.append(s1)
-                lst2.append(t1)
-                i += 1
-            i = 0
-            while i < 42:
-                check(lst2[i].i == 10000 + i + n)
-                i += 1
-            n -= 1
-            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
-        return None, f42, None
-
-    def test_compile_framework_minimal_size_in_nursery(self):
-        self.run('compile_framework_minimal_size_in_nursery')
-
-
-class TestShadowStack(CompileFrameworkTests):
+class TestShadowStack(ReleaseGILTests):
     gcrootfinder = "shadowstack"
 
-class TestAsmGcc(CompileFrameworkTests):
+class TestAsmGcc(ReleaseGILTests):
     gcrootfinder = "asmgcc"
diff --git a/pypy/jit/backend/x86/test/test_ztranslation.py b/pypy/jit/backend/x86/test/test_ztranslation.py
--- a/pypy/jit/backend/x86/test/test_ztranslation.py
+++ b/pypy/jit/backend/x86/test/test_ztranslation.py
@@ -2,7 +2,7 @@
 from pypy.tool.udir import udir
 from pypy.rlib.jit import JitDriver, unroll_parameters
 from pypy.rlib.jit import PARAMETERS, dont_look_inside
-from pypy.rlib.jit import hint
+from pypy.rlib.jit import promote
 from pypy.jit.metainterp.jitprof import Profiler
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.test.support import CCompiledMixin
@@ -78,8 +78,7 @@
             x = float(j)
             while i > 0:
                 jitdriver2.jit_merge_point(i=i, res=res, func=func, x=x)
-                jitdriver2.can_enter_jit(i=i, res=res, func=func, x=x)
-                func = hint(func, promote=True)
+                promote(func)
                 argchain = ArgChain()
                 argchain.arg(x)
                 res = func.call(argchain, rffi.DOUBLE)
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,7 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
-            elif not isinstance(value, ComputedIntSymbolic):
+            if not isinstance(value, (llmemory.AddressAsInt,
+                                      ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
                 if allow_short and -128 <= value <= 127:
                     # emit the constant as a small integer
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -208,12 +208,12 @@
         assert NON_VOID_ARGS == [T for T in ARGS if T is not lltype.Void]
         assert RESULT == FUNC.RESULT
         # ok
-        # get the 'pure' and 'loopinvariant' flags from the function object
-        pure = False
+        # get the 'elidable' and 'loopinvariant' flags from the function object
+        elidable = False
         loopinvariant = False
         if op.opname == "direct_call":
             func = getattr(get_funcobj(op.args[0].value), '_callable', None)
-            pure = getattr(func, "_pure_function_", False)
+            elidable = getattr(func, "_elidable_function_", False)
             loopinvariant = getattr(func, "_jit_loop_invariant_", False)
             if loopinvariant:
                 assert not NON_VOID_ARGS, ("arguments not supported for "
@@ -225,9 +225,9 @@
                 extraeffect = EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
             elif loopinvariant:
                 extraeffect = EffectInfo.EF_LOOPINVARIANT
-            elif pure:
+            elif elidable:
                 # XXX check what to do about exceptions (also MemoryError?)
-                extraeffect = EffectInfo.EF_PURE
+                extraeffect = EffectInfo.EF_ELIDABLE
             elif self._canraise(op):
                 extraeffect = EffectInfo.EF_CAN_RAISE
             else:
@@ -237,7 +237,9 @@
             self.readwrite_analyzer.analyze(op), self.cpu, extraeffect,
             oopspecindex, can_invalidate)
         #
-        if pure or loopinvariant:
+        if oopspecindex != EffectInfo.OS_NONE:
+            assert effectinfo is not None
+        if elidable or loopinvariant:
             assert effectinfo is not None
             assert extraeffect != EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
             # XXX this should also say assert not can_invalidate, but
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -9,7 +9,7 @@
     _cache = {}
 
     # the 'extraeffect' field is one of the following values:
-    EF_PURE                            = 0 #pure function (and cannot raise)
+    EF_ELIDABLE                        = 0 #elidable function (and cannot raise)
     EF_LOOPINVARIANT                   = 1 #special: call it only once per loop
     EF_CANNOT_RAISE                    = 2 #a function which cannot raise
     EF_CAN_RAISE                       = 3 #normal function (can raise)
@@ -75,12 +75,13 @@
     #
     OS_MATH_SQRT                = 100
 
-    def __new__(cls, readonly_descrs_fields,
+    def __new__(cls, readonly_descrs_fields, readonly_descrs_arrays,
                 write_descrs_fields, write_descrs_arrays,
                 extraeffect=EF_CAN_RAISE,
                 oopspecindex=OS_NONE,
                 can_invalidate=False):
         key = (frozenset(readonly_descrs_fields),
+               frozenset(readonly_descrs_arrays),
                frozenset(write_descrs_fields),
                frozenset(write_descrs_arrays),
                extraeffect,
@@ -89,8 +90,9 @@
             return cls._cache[key]
         result = object.__new__(cls)
         result.readonly_descrs_fields = readonly_descrs_fields
+        result.readonly_descrs_arrays = readonly_descrs_arrays
         if extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-           extraeffect == EffectInfo.EF_PURE:            
+           extraeffect == EffectInfo.EF_ELIDABLE:
             result.write_descrs_fields = []
             result.write_descrs_arrays = []
         else:
@@ -108,6 +110,9 @@
     def check_forces_virtual_or_virtualizable(self):
         return self.extraeffect >= self.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE
 
+    def has_random_effects(self):
+        return self.oopspecindex == self.OS_LIBFFI_CALL
+
 def effectinfo_from_writeanalyze(effects, cpu,
                                  extraeffect=EffectInfo.EF_CAN_RAISE,
                                  oopspecindex=EffectInfo.OS_NONE,
@@ -116,7 +121,7 @@
     if effects is top_set:
         return None
     readonly_descrs_fields = []
-    # readonly_descrs_arrays = [] --- not enabled for now
+    readonly_descrs_arrays = []
     write_descrs_fields = []
     write_descrs_arrays = []
 
@@ -142,10 +147,13 @@
         elif tup[0] == "array":
             add_array(write_descrs_arrays, tup)
         elif tup[0] == "readarray":
-            pass
+            tupw = ("array",) + tup[1:]
+            if tupw not in effects:
+                add_array(readonly_descrs_arrays, tup)
         else:
             assert 0
     return EffectInfo(readonly_descrs_fields,
+                      readonly_descrs_arrays,
                       write_descrs_fields,
                       write_descrs_arrays,
                       extraeffect,
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -768,10 +768,10 @@
         from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof
         from pypy.rlib.rarithmetic import intmask
         assert not self._is_gc(op.args[0])
-        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         size2, unsigned2 = size_and_sign(op.result.concretetype)
         if size2 >= sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
+        size1, unsigned1 = size_and_sign(op.args[0].concretetype)
         #
         def bounds(size, unsigned):
             if unsigned:
@@ -800,6 +800,13 @@
             result[-1].result = op.result
         return result
 
+    def rewrite_op_direct_ptradd(self, op):
+        from pypy.rpython.lltypesystem import rffi
+        # xxx otherwise, not implemented:
+        assert op.args[0].concretetype == rffi.CCHARP
+        #
+        return SpaceOperation('int_add', [op.args[0], op.args[1]], op.result)
+
     # ----------
     # Long longs, for 32-bit only.  Supported operations are left unmodified,
     # and unsupported ones are turned into a call to a function from
@@ -847,7 +854,7 @@
                 op1 = self.prepare_builtin_call(op, "llong_%s", args)
                 op2 = self._handle_oopspec_call(op1, args,
                                                 EffectInfo.OS_LLONG_%s,
-                                                EffectInfo.EF_PURE)
+                                                EffectInfo.EF_ELIDABLE)
                 if %r == "TO_INT":
                     assert op2.result.concretetype == lltype.Signed
                 return op2
@@ -1328,13 +1335,13 @@
                     otherindex += EffectInfo._OS_offset_uni
                 self._register_extra_helper(otherindex, othername,
                                             argtypes, resulttype,
-                                            EffectInfo.EF_PURE)
+                                            EffectInfo.EF_ELIDABLE)
         #
         return self._handle_oopspec_call(op, args, dict[oopspec_name],
-                                         EffectInfo.EF_PURE)
+                                         EffectInfo.EF_ELIDABLE)
 
     def _handle_str2unicode_call(self, op, oopspec_name, args):
-        # ll_str2unicode is not EF_PURE, because it can raise
+        # ll_str2unicode is not EF_ELIDABLE, because it can raise
         # UnicodeDecodeError...
         return self._handle_oopspec_call(op, args, EffectInfo.OS_STR2UNICODE)
 
@@ -1380,7 +1387,7 @@
     
     def _handle_math_sqrt_call(self, op, oopspec_name, args):
         return self._handle_oopspec_call(op, args, EffectInfo.OS_MATH_SQRT,
-                                         EffectInfo.EF_PURE)
+                                         EffectInfo.EF_ELIDABLE)
 
     def rewrite_op_jit_force_quasi_immutable(self, op):
         v_inst, c_fieldname = op.args
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -35,8 +35,8 @@
     def _reject_function(self, func):
         if hasattr(func, '_jit_look_inside_'):
             return not func._jit_look_inside_
-        # explicitly pure functions are always opaque
-        if getattr(func, '_pure_function_', False):
+        # explicitly elidable functions are always opaque
+        if getattr(func, '_elidable_function_', False):
             return True
         # pypy.rpython.module.* are opaque helpers
         mod = func.__module__ or '?'
@@ -44,10 +44,6 @@
             return True
         if mod.startswith('pypy.translator.'): # XXX wtf?
             return True
-        # string builder interface
-        if mod == 'pypy.rpython.lltypesystem.rbuilder':
-            return True
-        
         return False
 
     def look_inside_graph(self, graph):
diff --git a/pypy/jit/codewriter/test/test_effectinfo.py b/pypy/jit/codewriter/test/test_effectinfo.py
--- a/pypy/jit/codewriter/test/test_effectinfo.py
+++ b/pypy/jit/codewriter/test/test_effectinfo.py
@@ -34,6 +34,15 @@
     assert not effectinfo.readonly_descrs_fields
     assert not effectinfo.write_descrs_arrays
 
+def test_include_read_array():
+    A = lltype.GcArray(lltype.Signed)
+    effects = frozenset([("readarray", lltype.Ptr(A))])
+    effectinfo = effectinfo_from_writeanalyze(effects, FakeCPU())
+    assert not effectinfo.readonly_descrs_fields
+    assert list(effectinfo.readonly_descrs_arrays) == [('arraydescr', A)]
+    assert not effectinfo.write_descrs_fields
+    assert not effectinfo.write_descrs_arrays
+
 def test_include_write_array():
     A = lltype.GcArray(lltype.Signed)
     effects = frozenset([("array", lltype.Ptr(A))])
@@ -51,6 +60,16 @@
     assert list(effectinfo.write_descrs_fields) == [('fielddescr', S, "a")]
     assert not effectinfo.write_descrs_arrays
 
+def test_dont_include_read_and_write_array():
+    A = lltype.GcArray(lltype.Signed)
+    effects = frozenset([("readarray", lltype.Ptr(A)),
+                         ("array", lltype.Ptr(A))])
+    effectinfo = effectinfo_from_writeanalyze(effects, FakeCPU())
+    assert not effectinfo.readonly_descrs_fields
+    assert not effectinfo.readonly_descrs_arrays
+    assert not effectinfo.write_descrs_fields
+    assert list(effectinfo.write_descrs_arrays) == [('arraydescr', A)]
+
 
 def test_filter_out_typeptr():
     effects = frozenset([("struct", lltype.Ptr(OBJECT), "typeptr")])
diff --git a/pypy/jit/codewriter/test/test_flatten.py b/pypy/jit/codewriter/test/test_flatten.py
--- a/pypy/jit/codewriter/test/test_flatten.py
+++ b/pypy/jit/codewriter/test/test_flatten.py
@@ -813,6 +813,15 @@
             int_return %i0
         """, transform=True)
 
+    def test_direct_ptradd(self):
+        from pypy.rpython.lltypesystem import rffi
+        def f(p, n):
+            return lltype.direct_ptradd(p, n)
+        self.encoding_test(f, [lltype.nullptr(rffi.CCHARP.TO), 123], """
+            int_add %i0, %i1 -> %i2
+            int_return %i2
+        """, transform=True)
+
 
 def check_force_cast(FROM, TO, operations, value):
     """Check that the test is correctly written..."""
diff --git a/pypy/jit/codewriter/test/test_jtransform.py b/pypy/jit/codewriter/test/test_jtransform.py
--- a/pypy/jit/codewriter/test/test_jtransform.py
+++ b/pypy/jit/codewriter/test/test_jtransform.py
@@ -122,7 +122,7 @@
             if oopspecindex == EI.OS_STR2UNICODE:
                 assert extraeffect == None    # not pure, can raise!
             else:
-                assert extraeffect == EI.EF_PURE
+                assert extraeffect == EI.EF_ELIDABLE
         return 'calldescr-%d' % oopspecindex
     def calldescr_canraise(self, calldescr):
         return False
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -45,8 +45,8 @@
     policy.set_supports_floats(False)
     assert not policy.look_inside_graph(graph)
 
-def test_purefunction():
-    @jit.purefunction
+def test_elidable():
+    @jit.elidable
     def g(x):
         return x + 2
     graph = support.getgraph(g, [5])
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -3,7 +3,7 @@
 from pypy.rlib.rarithmetic import intmask, LONG_BIT, r_uint, ovfcheck
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop
-from pypy.rlib.debug import make_sure_not_resized, fatalerror
+from pypy.rlib.debug import make_sure_not_resized
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.llinterp import LLException
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -3,7 +3,8 @@
 from pypy.rpython.ootypesystem import ootype
 from pypy.objspace.flow.model import Constant, Variable
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
+from pypy.rlib import rstack
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -13,8 +14,8 @@
 from pypy.jit.metainterp.history import BoxPtr, BoxObj, BoxFloat, Const
 from pypy.jit.metainterp import history
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
-from pypy.jit.metainterp.resume import NUMBERING
+from pypy.jit.metainterp.optimize import InvalidLoop
+from pypy.jit.metainterp.resume import NUMBERING, PENDINGFIELDSP
 from pypy.jit.codewriter import heaptracker, longlong
 
 def giveup():
@@ -118,6 +119,7 @@
         old_loop_token = optimize_loop(metainterp_sd, old_loop_tokens, loop,
                                        jitdriver_sd.warmstate.enable_opts)
     except InvalidLoop:
+        debug_print("compile_new_loop: got an InvalidLoop")
         return None
     if old_loop_token is not None:
         metainterp.staticdata.log("reusing old loop")
@@ -156,6 +158,7 @@
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     jitdriver_sd.on_compile(metainterp_sd.logger_ops, loop.token,
                             loop.operations, type, greenkey)
+    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
     loop_token = loop.token
     loop_token.number = n = globaldata.loopnumbering
@@ -170,7 +173,7 @@
     debug_start("jit-backend")
     try:
         ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    loop.token)
+                                                    loop.token, name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -300,7 +303,7 @@
     rd_numb = lltype.nullptr(NUMBERING)
     rd_consts = None
     rd_virtuals = None
-    rd_pendingfields = None
+    rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
 
     CNT_INT   = -0x20000000
     CNT_REF   = -0x40000000
@@ -452,9 +455,17 @@
         # Called during a residual call from the assembler, if the code
         # actually needs to force one of the virtualrefs or the virtualizable.
         # Implemented by forcing *all* virtualrefs and the virtualizable.
-        faildescr = cpu.force(token)
-        assert isinstance(faildescr, ResumeGuardForcedDescr)
-        faildescr.handle_async_forcing(token)
+
+        # don't interrupt me! If the stack runs out in force_from_resumedata()
+        # then we have seen cpu.force() but not self.save_data(), leaving in
+        # an inconsistent state
+        rstack._stack_criticalcode_start()
+        try:
+            faildescr = cpu.force(token)
+            assert isinstance(faildescr, ResumeGuardForcedDescr)
+            faildescr.handle_async_forcing(token)
+        finally:
+            rstack._stack_criticalcode_stop()
 
     def handle_async_forcing(self, force_token):
         from pypy.jit.metainterp.resume import force_from_resumedata
@@ -623,6 +634,7 @@
                                             new_loop, state.enable_opts,
                                             inline_short_preamble, retraced)
     except InvalidLoop:
+        debug_print("compile_new_bridge: got an InvalidLoop")
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
         return None
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -82,9 +82,6 @@
 do_call_loopinvariant = do_call
 do_call_may_force = do_call
 
-def do_call_c(cpu, metainterp, argboxes, descr):
-    raise NotImplementedError("Should never be called directly")
-
 def do_getarrayitem_gc(cpu, _, arraybox, indexbox, arraydescr):
     array = arraybox.getref_base()
     index = indexbox.getint()
@@ -319,9 +316,11 @@
             if value in (rop.FORCE_TOKEN,
                          rop.CALL_ASSEMBLER,
                          rop.COND_CALL_GC_WB,
+                         rop.COND_CALL_GC_WB_ARRAY,
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
+                         rop.CALL_RELEASE_GIL,
                          rop.QUASIIMMUT_FIELD,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -4,7 +4,7 @@
 from pypy.rpython.ootypesystem import ootype
 from pypy.rlib.objectmodel import we_are_translated, r_dict, Symbolic
 from pypy.rlib.objectmodel import compute_unique_id
-from pypy.rlib.rarithmetic import intmask, r_int64
+from pypy.rlib.rarithmetic import r_int64
 from pypy.conftest import option
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop
@@ -712,10 +712,14 @@
         return -2      # xxx risk of changing hash...
 
 def make_hashable_int(i):
+    from pypy.rpython.lltypesystem.ll2ctypes import NotCtypesAllocatedStructure
     if not we_are_translated() and isinstance(i, llmemory.AddressAsInt):
         # Warning: such a hash changes at the time of translation
         adr = heaptracker.int2adr(i)
-        return llmemory.cast_adr_to_int(adr, "emulated")
+        try:
+            return llmemory.cast_adr_to_int(adr, "emulated")
+        except NotCtypesAllocatedStructure:
+            return 12345 # use an arbitrary number for the hash
     return i
 
 def get_const_ptr_for_string(s):
@@ -761,6 +765,7 @@
     """
     short_preamble = None
     failed_states = None
+    retraced_count = 0
     terminating = False # see TerminatingLoopToken in compile.py
     outermost_jitdriver_sd = None
     # and more data specified by the backend when the loop is compiled
@@ -787,11 +792,13 @@
 
     def dump(self):
         self.compiled_loop_token.cpu.dump_loop_token(self)
+
 class TreeLoop(object):
     inputargs = None
     operations = None
     token = None
     call_pure_results = None
+    logops = None
     quasi_immutable_deps = None
 
     def __init__(self, name):
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -11,47 +11,71 @@
 
     def __init__(self, metainterp_sd, guard_number=False):
         self.metainterp_sd = metainterp_sd
-        self.ts = metainterp_sd.cpu.ts
         self.guard_number = guard_number
 
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         if type is None:
             debug_start("jit-log-noopt-loop")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, ":", type,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-loop")
+        return logops
 
     def log_bridge(self, inputargs, operations, number=-1, ops_offset=None):
         if number == -1:
             debug_start("jit-log-noopt-bridge")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations, ops_offset)
+            logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-bridge")
+        return logops
 
     def log_short_preamble(self, inputargs, operations):
         debug_start("jit-log-short-preamble")
-        self._log_operations(inputargs, operations, ops_offset=None)
-        debug_stop("jit-log-short-preamble")            
+        logops = self._log_operations(inputargs, operations, ops_offset=None)
+        debug_stop("jit-log-short-preamble")
+        return logops
+
+    def _log_operations(self, inputargs, operations, ops_offset):
+        if not have_debug_prints():
+            return None
+        logops = self._make_log_operations()
+        logops._log_operations(inputargs, operations, ops_offset)
+        return logops
+
+    def _make_log_operations(self):
+        return LogOperations(self.metainterp_sd, self.guard_number)
+
+
+class LogOperations(object):
+    """
+    ResOperation logger.  Each instance contains a memo giving numbers
+    to boxes, and is typically used to log a single loop.
+    """
+    def __init__(self, metainterp_sd, guard_number):
+        self.metainterp_sd = metainterp_sd
+        self.ts = metainterp_sd.cpu.ts
+        self.guard_number = guard_number
+        self.memo = {}
 
     def repr_of_descr(self, descr):
         return descr.repr_of_descr()
 
-    def repr_of_arg(self, memo, arg):
+    def repr_of_arg(self, arg):
         try:
-            mv = memo[arg]
+            mv = self.memo[arg]
         except KeyError:
-            mv = len(memo)
-            memo[arg] = mv
+            mv = len(self.memo)
+            self.memo[arg] = mv
         if isinstance(arg, ConstInt):
             if int_could_be_an_address(arg.value):
                 addr = arg.getaddr()
@@ -75,11 +99,12 @@
         else:
             return '?'
 
-    def repr_of_resop(self, memo, op, ops_offset=None):
+    def repr_of_resop(self, op, ops_offset=None):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
-            loc = op.getarg(0)._get_str()
-            reclev = op.getarg(1).getint()
-            return "debug_merge_point('%s', %s)" % (loc, reclev)
+            jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            s = s.replace(',', '.') # we use comma for argument splitting
+            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
         if ops_offset is None:
             offset = -1
         else:
@@ -88,9 +113,10 @@
             s_offset = ""
         else:
             s_offset = "+%d: " % offset
-        args = ", ".join([self.repr_of_arg(memo, op.getarg(i)) for i in range(op.numargs())])
+        args = ", ".join([self.repr_of_arg(op.getarg(i)) for i in range(op.numargs())])
+
         if op.result is not None:
-            res = self.repr_of_arg(memo, op.result) + " = "
+            res = self.repr_of_arg(op.result) + " = "
         else:
             res = ""
         is_guard = op.is_guard()
@@ -103,7 +129,7 @@
                 r = self.repr_of_descr(descr)
             args += ', descr=' +  r
         if is_guard and op.getfailargs() is not None:
-            fail_args = ' [' + ", ".join([self.repr_of_arg(memo, arg)
+            fail_args = ' [' + ", ".join([self.repr_of_arg(arg)
                                           for arg in op.getfailargs()]) + ']'
         else:
             fail_args = ''
@@ -114,13 +140,12 @@
             return
         if ops_offset is None:
             ops_offset = {}
-        memo = {}
         if inputargs is not None:
-            args = ", ".join([self.repr_of_arg(memo, arg) for arg in inputargs])
+            args = ", ".join([self.repr_of_arg(arg) for arg in inputargs])
             debug_print('[' + args + ']')
         for i in range(len(operations)):
             op = operations[i]
-            debug_print(self.repr_of_resop(memo, operations[i], ops_offset))
+            debug_print(self.repr_of_resop(operations[i], ops_offset))
         if ops_offset and None in ops_offset:
             offset = ops_offset[None]
             debug_print("+%d: --end of the loop--" % offset)
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -1,9 +1,20 @@
 from pypy.rlib.debug import debug_start, debug_stop
+from pypy.jit.metainterp.jitexc import JitException
+
+class InvalidLoop(JitException):
+    """Raised when the optimize*.py detect that the loop that
+    we are trying to build cannot possibly make sense as a
+    long-running loop (e.g. it cannot run 2 complete iterations)."""
+
+class RetraceLoop(JitException):
+    """ Raised when inlining a short preamble resulted in an
+        InvalidLoop. This means the optimized loop is too specialized
+        to be useful here, so we trace it again and produced a second
+        copy specialized in some different way.
+    """
 
 # ____________________________________________________________
 
-from pypy.jit.metainterp.optimizeopt import optimize_loop_1, optimize_bridge_1
-
 def optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
     debug_start("jit-optimize")
     try:
@@ -13,8 +24,9 @@
         debug_stop("jit-optimize")
 
 def _optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
-    cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations)
+    from pypy.jit.metainterp.optimizeopt import optimize_loop_1
+    loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
+                                                      loop.operations)
     # XXX do we really still need a list?
     if old_loop_tokens:
         return old_loop_tokens[0]
@@ -35,8 +47,9 @@
 
 def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
                      inline_short_preamble, retraced=False):
-    cpu = metainterp_sd.cpu
-    metainterp_sd.logger_noopt.log_loop(bridge.inputargs, bridge.operations)
+    from pypy.jit.metainterp.optimizeopt import optimize_bridge_1
+    bridge.logops = metainterp_sd.logger_noopt.log_loop(bridge.inputargs,
+                                                        bridge.operations)
     if old_loop_tokens:
         old_loop_token = old_loop_tokens[0]
         bridge.operations[-1].setdescr(old_loop_token)   # patch jump target
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -3,7 +3,7 @@
 from pypy.jit.metainterp.optimizeopt.intbounds import OptIntBounds
 from pypy.jit.metainterp.optimizeopt.virtualize import OptVirtualize
 from pypy.jit.metainterp.optimizeopt.heap import OptHeap
-from pypy.jit.metainterp.optimizeopt.string import OptString
+from pypy.jit.metainterp.optimizeopt.vstring import OptString
 from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll, OptInlineShortPreamble
 from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
@@ -15,22 +15,20 @@
             ('virtualize', OptVirtualize),
             ('string', OptString),
             ('heap', OptHeap),
-            ('ffi', OptFfiCall),
+            ('ffi', None),
             ('unroll', None)]
 # no direct instantiation of unroll
 unroll_all_opts = unrolling_iterable(ALL_OPTS)
 
 ALL_OPTS_DICT = dict.fromkeys([name for name, _ in ALL_OPTS])
+ALL_OPTS_LIST = [name for name, _ in ALL_OPTS]
+ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 
-ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
-PARAMETERS['enable_opts'] = ALL_OPTS_NAMES
-
-def optimize_loop_1(metainterp_sd, loop, enable_opts,
+def build_opt_chain(metainterp_sd, enable_opts,
                     inline_short_preamble=True, retraced=False):
-    """Optimize loop.operations to remove internal overheadish operations.
-    """
+    config = metainterp_sd.config
     optimizations = []
-    unroll = 'unroll' in enable_opts
+    unroll = 'unroll' in enable_opts    # 'enable_opts' is normally a dict
     for name, opt in unroll_all_opts:
         if name in enable_opts:
             if opt is not None:
@@ -40,6 +38,11 @@
                 # FIXME: Workaround to disable string optimisation
                 # during preamble but to keep it during the loop
                 optimizations.append(o)
+            elif name == 'ffi' and config.translation.jit_ffi:
+                # we cannot put the class directly in the unrolling_iterable,
+                # because we do not want it to be seen at all (to avoid to
+                # introduce a dependency on libffi in case we do not need it)
+                optimizations.append(OptFfiCall())
 
     if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts
         or 'heap' not in enable_opts):
@@ -48,6 +51,17 @@
     if inline_short_preamble:
         optimizations = [OptInlineShortPreamble(retraced)] + optimizations
 
+    return optimizations, unroll
+
+
+def optimize_loop_1(metainterp_sd, loop, enable_opts,
+                    inline_short_preamble=True, retraced=False):
+    """Optimize loop.operations to remove internal overheadish operations.
+    """
+
+    optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts,
+                                            inline_short_preamble, retraced)
+
     if unroll:
         optimize_unroll(metainterp_sd, loop, optimizations)
     else:
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -1,10 +1,13 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.libffi import Func
+from pypy.rlib.debug import debug_start, debug_stop, debug_print, have_debug_prints
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
+
 
 class FuncInfo(object):
 
@@ -12,14 +15,18 @@
     restype = None
     descr = None
     prepare_op = None
-    force_token_op = None
 
     def __init__(self, funcval, cpu, prepare_op):
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        try:
+            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        except UnsupportedKind:
+            # e.g., I or U for long longs
+            self.descr = None
         self.prepare_op = prepare_op
+        self.delayed_ops = []
 
     def _get_signature(self, funcval):
         """
@@ -64,37 +71,51 @@
 
 class OptFfiCall(Optimization):
 
-    def __init__(self):
+    def setup(self):
         self.funcinfo = None
+        if self.optimizer.loop is not None:
+            self.logops = self.optimizer.loop.logops
+        else:
+            self.logops = None
+
+    def propagate_begin_forward(self):
+        debug_start('jit-log-ffiopt')
+        Optimization.propagate_begin_forward(self)
+
+    def propagate_end_forward(self):
+        debug_stop('jit-log-ffiopt')
+        Optimization.propagate_end_forward(self)
 
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return OptFfiCall()
         # FIXME: Should any status be saved for next iteration?
 
     def begin_optimization(self, funcval, op):
-        self.rollback_maybe()
+        self.rollback_maybe('begin_optimization', op)
         self.funcinfo = FuncInfo(funcval, self.optimizer.cpu, op)
 
     def commit_optimization(self):
         self.funcinfo = None
 
-    def rollback_maybe(self):
+    def rollback_maybe(self, msg, op):
         if self.funcinfo is None:
             return # nothing to rollback
         #
         # we immediately set funcinfo to None to prevent recursion when
         # calling emit_op
+        if self.logops is not None:
+            debug_print('rollback: ' + msg + ': ', self.logops.repr_of_resop(op))
         funcinfo = self.funcinfo
         self.funcinfo = None
         self.emit_operation(funcinfo.prepare_op)
         for op in funcinfo.opargs:
             self.emit_operation(op)
-        if funcinfo.force_token_op:
-            self.emit_operation(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            self.emit_operation(delayed_op)
 
     def emit_operation(self, op):
         # we cannot emit any operation during the optimization
-        self.rollback_maybe()
+        self.rollback_maybe('invalid op', op)
         Optimization.emit_operation(self, op)
 
     def optimize_CALL(self, op):
@@ -135,13 +156,18 @@
         # call_may_force and the setfield_gc, so the final result we get is
         # again force_token/setfield_gc/call_may_force.
         #
+        # However, note that nowadays we also allow to have any setfield_gc
+        # between libffi_prepare and libffi_call, so while the comment above
+        # it's a bit superfluous, it has been left there for future reference.
         if self.funcinfo is None:
             self.emit_operation(op)
         else:
-            self.funcinfo.force_token_op = op
+            self.funcinfo.delayed_ops.append(op)
+
+    optimize_SETFIELD_GC = optimize_FORCE_TOKEN
 
     def do_prepare_call(self, op):
-        self.rollback_maybe()
+        self.rollback_maybe('prepare call', op)
         funcval = self._get_funcval(op)
         if not funcval.is_constant():
             return [op] # cannot optimize
@@ -165,23 +191,19 @@
         for push_op in funcinfo.opargs:
             argval = self.getvalue(push_op.getarg(2))
             arglist.append(argval.force_box())
-        newop = ResOperation(rop.CALL_MAY_FORCE, arglist, op.result,
+        newop = ResOperation(rop.CALL_RELEASE_GIL, arglist, op.result,
                              descr=funcinfo.descr)
         self.commit_optimization()
         ops = []
-        if funcinfo.force_token_op:
-            ops.append(funcinfo.force_token_op)
+        for delayed_op in funcinfo.delayed_ops:
+            ops.append(delayed_op)
         ops.append(newop)
         return ops
 
     def propagate_forward(self, op):
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.emit_operation(op)
+        if self.logops is not None:
+            debug_print(self.logops.repr_of_resop(op))
+        dispatch_opt(self, op)
 
     def _get_oopspec(self, op):
         effectinfo = op.getdescr().get_extra_info()
@@ -192,4 +214,5 @@
     def _get_funcval(self, op):
         return self.getvalue(op.getarg(1))
 
-optimize_ops = _findall(OptFfiCall, 'optimize_')
+dispatch_opt = make_dispatcher_method(OptFfiCall, 'optimize_',
+        default=OptFfiCall.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -1,5 +1,5 @@
 import os
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp.jitexc import JitException
@@ -8,8 +8,8 @@
 
 class CachedField(object):
     def __init__(self):
-        # Cache information for a field descr.  It can be in one
-        # of two states:
+        # Cache information for a field descr, or for an (array descr, index)
+        # pair.  It can be in one of two states:
         #
         #   1. 'cached_fields' is a dict mapping OptValues of structs
         #      to OptValues of fields.  All fields on-heap are
@@ -27,19 +27,19 @@
         self._lazy_setfield_registered = False
 
     def do_setfield(self, optheap, op):
-        # Update the state with the SETFIELD_GC operation 'op'.
+        # Update the state with the SETFIELD_GC/SETARRAYITEM_GC operation 'op'.
         structvalue = optheap.getvalue(op.getarg(0))
-        fieldvalue  = optheap.getvalue(op.getarg(1))
+        fieldvalue  = optheap.getvalue(op.getarglist()[-1])
         if self.possible_aliasing(optheap, structvalue):
             self.force_lazy_setfield(optheap)
             assert not self.possible_aliasing(optheap, structvalue)
         cached_fieldvalue = self._cached_fields.get(structvalue, None)
         if cached_fieldvalue is not fieldvalue:
             # common case: store the 'op' as lazy_setfield, and register
-            # myself in the optheap's _lazy_setfields list
+            # myself in the optheap's _lazy_setfields_and_arrayitems list
             self._lazy_setfield = op
             if not self._lazy_setfield_registered:
-                optheap._lazy_setfields.append(self)
+                optheap._lazy_setfields_and_arrayitems.append(self)
                 self._lazy_setfield_registered = True
         else:
             # this is the case where the pending setfield ends up
@@ -65,7 +65,7 @@
         if self._lazy_setfield is not None:
             op = self._lazy_setfield
             assert optheap.getvalue(op.getarg(0)) is structvalue
-            return optheap.getvalue(op.getarg(1))
+            return optheap.getvalue(op.getarglist()[-1])
         else:
             return self._cached_fields.get(structvalue, None)
 
@@ -87,7 +87,7 @@
             # back in the cache: the value of this particular structure's
             # field.
             structvalue = optheap.getvalue(op.getarg(0))
-            fieldvalue  = optheap.getvalue(op.getarg(1))
+            fieldvalue  = optheap.getvalue(op.getarglist()[-1])
             self.remember_field_value(structvalue, fieldvalue)
 
     def get_reconstructed(self, optimizer, valuemap):
@@ -100,25 +100,20 @@
         return cf
 
 
-class CachedArrayItems(object):
-    def __init__(self):
-        self.fixed_index_items = {}
-        self.var_index_item = None
-        self.var_index_indexvalue = None
-
 class BogusPureField(JitException):
     pass
 
 
 class OptHeap(Optimization):
     """Cache repeated heap accesses"""
-    
+
     def __init__(self):
         # cached fields:  {descr: CachedField}
         self.cached_fields = {}
-        self._lazy_setfields = []
-        # cached array items:  {descr: CachedArrayItems}
+        # cached array items:  {array descr: {index: CachedField}}
         self.cached_arrayitems = {}
+        #
+        self._lazy_setfields_and_arrayitems = []
         self._remove_guard_not_invalidated = False
         self._seen_guard_not_invalidated = False
 
@@ -126,34 +121,23 @@
         new = OptHeap()
 
         if True:
-            self.force_all_lazy_setfields()
+            self.force_all_lazy_setfields_and_arrayitems()
         else:
             assert 0   # was: new.lazy_setfields = self.lazy_setfields
-        
+
         for descr, d in self.cached_fields.items():
             new.cached_fields[descr] = d.get_reconstructed(optimizer, valuemap)
 
-        new.cached_arrayitems = {}
-        for descr, d in self.cached_arrayitems.items():
-            newd = {}
-            new.cached_arrayitems[descr] = newd
-            for value, cache in d.items():
-                newcache = CachedArrayItems()
-                newd[value.get_reconstructed(optimizer, valuemap)] = newcache
-                if cache.var_index_item:
-                    newcache.var_index_item = \
-                          cache.var_index_item.get_reconstructed(optimizer, valuemap)
-                if cache.var_index_indexvalue:
-                    newcache.var_index_indexvalue = \
-                          cache.var_index_indexvalue.get_reconstructed(optimizer, valuemap)
-                for index, fieldvalue in cache.fixed_index_items.items():
-                    newcache.fixed_index_items[index] = \
-                           fieldvalue.get_reconstructed(optimizer, valuemap)
+        for descr, submap in self.cached_arrayitems.items():
+            newdict = {}
+            for index, d in submap.items():
+                newdict[index] = d.get_reconstructed(optimizer, valuemap)
+            new.cached_arrayitems[descr] = newdict
 
         return new
 
     def clean_caches(self):
-        del self._lazy_setfields[:]
+        del self._lazy_setfields_and_arrayitems[:]
         self.cached_fields.clear()
         self.cached_arrayitems.clear()
 
@@ -164,50 +148,16 @@
             cf = self.cached_fields[descr] = CachedField()
         return cf
 
-    def cache_arrayitem_value(self, descr, value, indexvalue, fieldvalue, write=False):
-        d = self.cached_arrayitems.get(descr, None)
-        if d is None:
-            d = self.cached_arrayitems[descr] = {}
-        cache = d.get(value, None)
-        if cache is None:
-            cache = d[value] = CachedArrayItems()
-        indexbox = self.get_constant_box(indexvalue.box)
-        if indexbox is not None:
-            index = indexbox.getint()
-            if write:
-                for value, othercache in d.iteritems():
-                    # fixed index, clean the variable index cache, in case the
-                    # index is the same
-                    othercache.var_index_indexvalue = None
-                    othercache.var_index_item = None
-                    try:
-                        del othercache.fixed_index_items[index]
-                    except KeyError:
-                        pass
-            cache.fixed_index_items[index] = fieldvalue
-        else:
-            if write:
-                for value, othercache in d.iteritems():
-                    # variable index, clear all caches for this descr
-                    othercache.var_index_indexvalue = None
-                    othercache.var_index_item = None
-                    othercache.fixed_index_items.clear()
-            cache.var_index_indexvalue = indexvalue
-            cache.var_index_item = fieldvalue
-
-    def read_cached_arrayitem(self, descr, value, indexvalue):
-        d = self.cached_arrayitems.get(descr, None)
-        if d is None:
-            return None
-        cache = d.get(value, None)
-        if cache is None:
-            return None
-        indexbox = self.get_constant_box(indexvalue.box)
-        if indexbox is not None:
-            return cache.fixed_index_items.get(indexbox.getint(), None)
-        elif cache.var_index_indexvalue is indexvalue:
-            return cache.var_index_item
-        return None
+    def arrayitem_cache(self, descr, index):
+        try:
+            submap = self.cached_arrayitems[descr]
+        except KeyError:
+            submap = self.cached_arrayitems[descr] = {}
+        try:
+            cf = submap[index]
+        except KeyError:
+            cf = submap[index] = CachedField()
+        return cf
 
     def emit_operation(self, op):
         self.emitting_operation(op)
@@ -219,7 +169,8 @@
         if op.is_ovf():
             return
         if op.is_guard():
-            self.optimizer.pendingfields = self.force_lazy_setfields_for_guard()
+            self.optimizer.pendingfields = (
+                self.force_lazy_setfields_and_arrayitems_for_guard())
             return
         opnum = op.getopnum()
         if (opnum == rop.SETFIELD_GC or        # handled specially
@@ -235,6 +186,7 @@
         assert opnum != rop.CALL_PURE
         if (opnum == rop.CALL or
             opnum == rop.CALL_MAY_FORCE or
+            opnum == rop.CALL_RELEASE_GIL or
             opnum == rop.CALL_ASSEMBLER):
             if opnum == rop.CALL_ASSEMBLER:
                 effectinfo = None
@@ -242,11 +194,13 @@
                 effectinfo = op.getdescr().get_extra_info()
             if effectinfo is None or effectinfo.check_can_invalidate():
                 self._seen_guard_not_invalidated = False
-            if effectinfo is not None:
+            if effectinfo is not None and not effectinfo.has_random_effects():
                 # XXX we can get the wrong complexity here, if the lists
                 # XXX stored on effectinfo are large
                 for fielddescr in effectinfo.readonly_descrs_fields:
                     self.force_lazy_setfield(fielddescr)
+                for arraydescr in effectinfo.readonly_descrs_arrays:
+                    self.force_lazy_setarrayitem(arraydescr)
                 for fielddescr in effectinfo.write_descrs_fields:
                     self.force_lazy_setfield(fielddescr)
                     try:
@@ -255,8 +209,11 @@
                     except KeyError:
                         pass
                 for arraydescr in effectinfo.write_descrs_arrays:
+                    self.force_lazy_setarrayitem(arraydescr)
                     try:
-                        del self.cached_arrayitems[arraydescr]
+                        submap = self.cached_arrayitems[arraydescr]
+                        for cf in submap.itervalues():
+                            cf._cached_fields.clear()
                     except KeyError:
                         pass
                 if effectinfo.check_forces_virtual_or_virtualizable():
@@ -265,7 +222,7 @@
                     # ^^^ we only need to force this field; the other fields
                     # of virtualref_info and virtualizable_info are not gcptrs.
                 return
-        self.force_all_lazy_setfields()
+        self.force_all_lazy_setfields_and_arrayitems()
         self.clean_caches()
 
 
@@ -276,6 +233,10 @@
             for cf in self.cached_fields.itervalues():
                 if value in cf._cached_fields:
                     cf._cached_fields[newvalue] = cf._cached_fields[value]
+            for submap in self.cached_arrayitems.itervalues():
+                for cf in submap.itervalues():
+                    if value in cf._cached_fields:
+                        cf._cached_fields[newvalue] = cf._cached_fields[value]
 
     def force_lazy_setfield(self, descr):
         try:
@@ -284,6 +245,14 @@
             return
         cf.force_lazy_setfield(self)
 
+    def force_lazy_setarrayitem(self, arraydescr):
+        try:
+            submap = self.cached_arrayitems[arraydescr]
+        except KeyError:
+            return
+        for cf in submap.values():
+            cf.force_lazy_setfield(self)
+
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
         # sense to avoid a situation like "int_eq/setfield_gc/guard_true",
@@ -308,30 +277,49 @@
         newoperations[-2] = lastop
         newoperations[-1] = prevop
 
-    def force_all_lazy_setfields(self):
-        for cf in self._lazy_setfields:
-            if not we_are_translated():
-                assert cf in self.cached_fields.values()
+    def _assert_valid_cf(self, cf):
+        # check that 'cf' is in cached_fields or cached_arrayitems
+        if not we_are_translated():
+            if cf not in self.cached_fields.values():
+                for submap in self.cached_arrayitems.values():
+                    if cf in submap.values():
+                        break
+                else:
+                    assert 0, "'cf' not in cached_fields/cached_arrayitems"
+
+    def force_all_lazy_setfields_and_arrayitems(self):
+        for cf in self._lazy_setfields_and_arrayitems:
+            self._assert_valid_cf(cf)
             cf.force_lazy_setfield(self)
 
-    def force_lazy_setfields_for_guard(self):
+    def force_lazy_setfields_and_arrayitems_for_guard(self):
         pendingfields = []
-        for cf in self._lazy_setfields:
-            if not we_are_translated():
-                assert cf in self.cached_fields.values()
+        for cf in self._lazy_setfields_and_arrayitems:
+            self._assert_valid_cf(cf)
             op = cf._lazy_setfield
             if op is None:
                 continue
             # the only really interesting case that we need to handle in the
             # guards' resume data is that of a virtual object that is stored
-            # into a field of a non-virtual object.
+            # into a field of a non-virtual object.  Here, 'op' in either
+            # SETFIELD_GC or SETARRAYITEM_GC.
             value = self.getvalue(op.getarg(0))
             assert not value.is_virtual()      # it must be a non-virtual
-            fieldvalue = self.getvalue(op.getarg(1))
+            fieldvalue = self.getvalue(op.getarglist()[-1])
             if fieldvalue.is_virtual():
                 # this is the case that we leave to resume.py
+                opnum = op.getopnum()
+                if opnum == rop.SETFIELD_GC:
+                    itemindex = -1
+                elif opnum == rop.SETARRAYITEM_GC:
+                    indexvalue = self.getvalue(op.getarg(1))
+                    assert indexvalue.is_constant()
+                    itemindex = indexvalue.box.getint()
+                    assert itemindex >= 0
+                else:
+                    assert 0
                 pendingfields.append((op.getdescr(), value.box,
-                                      fieldvalue.get_key_box()))
+                                      fieldvalue.get_key_box(), itemindex))
             else:
                 cf.force_lazy_setfield(self)
                 self.fixup_guard_situation()
@@ -363,24 +351,45 @@
         cf.do_setfield(self, op)
 
     def optimize_GETARRAYITEM_GC(self, op):
-        value = self.getvalue(op.getarg(0))
+        arrayvalue = self.getvalue(op.getarg(0))
         indexvalue = self.getvalue(op.getarg(1))
-        fieldvalue = self.read_cached_arrayitem(op.getdescr(), value, indexvalue)
-        if fieldvalue is not None:
-            self.make_equal_to(op.result, fieldvalue)
-            return
-        ###self.optimizer.optimize_default(op)
+        cf = None
+        if indexvalue.is_constant():
+            # use the cache on (arraydescr, index), which is a constant
+            cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
+            fieldvalue = cf.getfield_from_cache(self, arrayvalue)
+            if fieldvalue is not None:
+                self.make_equal_to(op.result, fieldvalue)
+                return
+        else:
+            # variable index, so make sure the lazy setarrayitems are done
+            self.force_lazy_setarrayitem(op.getdescr())
+        # default case: produce the operation
+        arrayvalue.ensure_nonnull()
         self.emit_operation(op)
-        fieldvalue = self.getvalue(op.result)
-        self.cache_arrayitem_value(op.getdescr(), value, indexvalue, fieldvalue)
+        # the remember the result of reading the array item
+        if cf is not None:
+            fieldvalue = self.getvalue(op.result)
+            cf.remember_field_value(arrayvalue, fieldvalue)
 
     def optimize_SETARRAYITEM_GC(self, op):
-        self.emit_operation(op)
-        value = self.getvalue(op.getarg(0))
-        fieldvalue = self.getvalue(op.getarg(2))
+        if self.has_pure_result(rop.GETARRAYITEM_GC_PURE, [op.getarg(0),
+                                                           op.getarg(1)],
+                                op.getdescr()):
+            os.write(2, '[bogus immutable array declaration: %s]\n' %
+                     (op.getdescr().repr_of_descr()))
+            raise BogusPureField
+        #
         indexvalue = self.getvalue(op.getarg(1))
-        self.cache_arrayitem_value(op.getdescr(), value, indexvalue, fieldvalue,
-                                   write=True)
+        if indexvalue.is_constant():
+            # use the cache on (arraydescr, index), which is a constant
+            cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
+            cf.do_setfield(self, op)
+        else:
+            # variable index, so make sure the lazy setarrayitems are done
+            self.force_lazy_setarrayitem(op.getdescr())
+            # and then emit the operation
+            self.emit_operation(op)
 
     def optimize_QUASIIMMUT_FIELD(self, op):
         # Pattern: QUASIIMMUT_FIELD(s, descr=QuasiImmutDescr)
@@ -422,13 +431,7 @@
         self._seen_guard_not_invalidated = True
         self.emit_operation(op)
 
-    def propagate_forward(self, op):
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.emit_operation(op)
 
-optimize_ops = _findall(OptHeap, 'optimize_')
+dispatch_opt = make_dispatcher_method(OptHeap, 'optimize_',
+        default=OptHeap.emit_operation)
+OptHeap.propagate_forward = dispatch_opt
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
-from pypy.jit.metainterp.optimizeutil import _findall
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded, \
-    IntLowerBound, IntUpperBound
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntUnbounded,
+    IntLowerBound, IntUpperBound)
 from pypy.jit.metainterp.history import Const, ConstInt
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 
@@ -17,6 +17,14 @@
         assert self.posponedop is None
         return self
 
+    def setup(self):
+        self.posponedop = None
+        self.nextop = None
+
+    def reconstruct_for_next_iteration(self, optimizer, valuemap):
+        assert self.posponedop is None
+        return self
+
     def propagate_forward(self, op):
         if op.is_ovf():
             self.posponedop = op
@@ -26,14 +34,11 @@
             op = self.posponedop
             self.posponedop = None
 
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            assert not op.is_ovf()
-            self.emit_operation(op)
+        dispatch_opt(self, op)
+
+    def opt_default(self, op):
+        assert not op.is_ovf()
+        self.emit_operation(op)
 
 
     def propagate_bounds_backward(self, box):
@@ -49,11 +54,7 @@
             op = self.optimizer.producer[box]
         except KeyError:
             return
-        opnum = op.getopnum()
-        for value, func in propagate_bounds_ops:
-            if opnum == value:
-                func(self, op)
-                break
+        dispatch_bounds_ops(self, op)
 
     def optimize_GUARD_TRUE(self, op):
         self.emit_operation(op)
@@ -186,7 +187,7 @@
                 # Synthesize the reverse ops for optimize_default to reuse
                 self.pure(rop.INT_ADD, [op.result, op.getarg(1)], op.getarg(0))
                 self.pure(rop.INT_SUB, [op.getarg(0), op.result], op.getarg(1))
-                
+
 
     def optimize_INT_MUL_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
@@ -284,6 +285,11 @@
         v1.intbound.make_ge(IntLowerBound(0))
         v1.intbound.make_lt(IntUpperBound(256))
 
+    def optimize_UNICODEGETITEM(self, op):
+        self.emit_operation(op)
+        v1 = self.getvalue(op.result)
+        v1.intbound.make_ge(IntLowerBound(0))
+
     def make_int_lt(self, box1, box2):
         v1 = self.getvalue(box1)
         v2 = self.getvalue(box2)
@@ -360,6 +366,27 @@
                 if v2.intbound.intersect(v1.intbound):
                     self.propagate_bounds_backward(op.getarg(1))
 
+    def propagate_bounds_INT_IS_TRUE(self, op):
+        r = self.getvalue(op.result)
+        if r.is_constant():
+            if r.box.same_constant(CONST_1):
+                v1 = self.getvalue(op.getarg(0))
+                if v1.intbound.known_ge(IntBound(0, 0)):
+                    v1.intbound.make_gt(IntBound(0, 0))
+                    self.propagate_bounds_backward(op.getarg(0))
+
+    def propagate_bounds_INT_IS_ZERO(self, op):
+        r = self.getvalue(op.result)
+        if r.is_constant():
+            if r.box.same_constant(CONST_1):
+                v1 = self.getvalue(op.getarg(0))
+                # Clever hack, we can't use self.make_constant_int yet because
+                # the args aren't in the values dictionary yet so it runs into
+                # an assert, this is a clever way of expressing the same thing.
+                v1.intbound.make_ge(IntBound(0, 0))
+                v1.intbound.make_lt(IntBound(1, 1))
+                self.propagate_bounds_backward(op.getarg(0))
+
     def propagate_bounds_INT_ADD(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
@@ -405,5 +432,7 @@
     propagate_bounds_INT_SUB_OVF  = propagate_bounds_INT_SUB
     propagate_bounds_INT_MUL_OVF  = propagate_bounds_INT_MUL
 
-optimize_ops = _findall(OptIntBounds, 'optimize_')
-propagate_bounds_ops = _findall(OptIntBounds, 'propagate_bounds_')
+
+dispatch_opt = make_dispatcher_method(OptIntBounds, 'optimize_',
+        default=OptIntBounds.opt_default)
+dispatch_bounds_ops = make_dispatcher_method(OptIntBounds, 'propagate_bounds_')
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -4,9 +4,9 @@
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp import jitprof
 from pypy.jit.metainterp.executor import execute_nonspec
-from pypy.jit.metainterp.optimizeutil import _findall, sort_descrs
-from pypy.jit.metainterp.optimizeutil import descrlist_dict
-from pypy.jit.metainterp.optimizeutil import InvalidLoop, args_dict
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method, sort_descrs
+from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, args_dict
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp import resume, compile
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.rpython.lltypesystem import lltype
@@ -141,6 +141,9 @@
         # meaning it has been forced.
         return self.box is None
 
+    def is_forced_virtual(self):
+        return False
+
     def getfield(self, ofs, default):
         raise NotImplementedError
 
@@ -175,6 +178,14 @@
     def __init__(self):
         pass # make rpython happy
 
+    def propagate_begin_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_begin_forward()
+
+    def propagate_end_forward(self):
+        if self.next_optimization:
+            self.next_optimization.propagate_end_forward()
+
     def propagate_forward(self, op):
         raise NotImplementedError
 
@@ -406,11 +417,13 @@
         # ^^^ at least at the start of bridges.  For loops, we could set
         # it to False, but we probably don't care
         self.newoperations = []
+        self.first_optimization.propagate_begin_forward()
         self.i = 0
         while self.i < len(self.loop.operations):
             op = self.loop.operations[self.i]
             self.first_optimization.propagate_forward(op)
             self.i += 1
+        self.first_optimization.propagate_end_forward()
         self.loop.operations = self.newoperations
         self.loop.quasi_immutable_deps = self.quasi_immutable_deps
         # accumulate counters
@@ -421,14 +434,7 @@
 
     def propagate_forward(self, op):
         self.producer[op.result] = op
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.optimize_default(op)
-        #print '\n'.join([str(o) for o in self.newoperations]) + '\n---\n'
+        dispatch_opt(self, op)
 
     def test_emittable(self, op):
         return True
@@ -556,7 +562,8 @@
     def optimize_DEBUG_MERGE_POINT(self, op):
         self.emit_operation(op)
 
-optimize_ops = _findall(Optimizer, 'optimize_')
+dispatch_opt = make_dispatcher_method(Optimizer, 'optimize_',
+        default=Optimizer.optimize_default)
 
 
 
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import *
 from pypy.jit.metainterp.resoperation import opboolinvers, opboolreflex
 from pypy.jit.metainterp.history import ConstInt
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import _findall, make_dispatcher_method
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.metainterp.optimizeopt.intutils import IntBound
@@ -21,18 +21,13 @@
         if self.find_rewritable_bool(op, args):
             return
 
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.emit_operation(op)
+        dispatch_opt(self, op)
 
     def test_emittable(self, op):
         opnum = op.getopnum()
-        for value, func in optimize_guards:
+        for value, cls, func in optimize_guards:
             if opnum == value:
+                assert isinstance(op, cls)
                 try:
                     func(self, op, dryrun=True)
                     return self.is_emittable(op)
@@ -184,6 +179,32 @@
         else:
             self.emit_operation(op)
 
+    def optimize_FLOAT_MUL(self, op):
+        arg1 = op.getarg(0)
+        arg2 = op.getarg(1)
+
+        # Constant fold f0 * 1.0 and turn f0 * -1.0 into a FLOAT_NEG, these
+        # work in all cases, including NaN and inf
+        for lhs, rhs in [(arg1, arg2), (arg2, arg1)]:
+            v1 = self.getvalue(lhs)
+            v2 = self.getvalue(rhs)
+
+            if v1.is_constant():
+                if v1.box.getfloat() == 1.0:
+                    self.make_equal_to(op.result, v2)
+                    return
+                elif v1.box.getfloat() == -1.0:
+                    self.emit_operation(ResOperation(
+                        rop.FLOAT_NEG, [rhs], op.result
+                    ))
+                    return
+        self.emit_operation(op)
+
+    def optimize_FLOAT_NEG(self, op):
+        v1 = op.getarg(0)
+        self.emit_operation(op)
+        self.pure(rop.FLOAT_NEG, [op.result], v1)
+
     def optimize_CALL_PURE(self, op):
         arg_consts = []
         for i in range(op.numargs()):
@@ -193,7 +214,7 @@
                 break
             arg_consts.append(const)
         else:
-            # all constant arguments: check if we already know the reslut
+            # all constant arguments: check if we already know the result
             try:
                 result = self.optimizer.call_pure_results[arg_consts]
             except KeyError:
@@ -451,5 +472,6 @@
         self.emit_operation(op)
 
 
-optimize_ops = _findall(OptRewrite, 'optimize_')
+dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_',
+        default=OptRewrite.emit_operation)
 optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD')
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -1,7 +1,7 @@
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 
 class OptSimplify(Optimization):
     def optimize_CALL_PURE(self, op):
@@ -25,13 +25,7 @@
         #     but it's a bit hard to implement robustly if heap.py is also run
         pass
 
-    def propagate_forward(self, op):
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.emit_operation(op)
 
-optimize_ops = _findall(OptSimplify, 'optimize_')
+dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
+        default=OptSimplify.emit_operation)
+OptSimplify.propagate_forward = dispatch_opt
diff --git a/pypy/jit/metainterp/optimizeopt/test/__init__.py b/pypy/jit/metainterp/optimizeopt/test/__init__.py
new file mode 100644
diff --git a/pypy/jit/metainterp/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
rename from pypy/jit/metainterp/test/test_optimizebasic.py
rename to pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -1,37 +1,15 @@
 import py
 from pypy.rlib.objectmodel import instantiate
-from pypy.jit.metainterp.test.test_optimizeutil import (LLtypeMixin,
-                                                        #OOtypeMixin,
-                                                        BaseTest)
+from pypy.jit.metainterp.optimizeopt.test.test_util import (
+    LLtypeMixin, BaseTest, FakeMetaInterpStaticData)
+from pypy.jit.metainterp.test.test_compile import FakeLogger
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
-from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
-from pypy.jit.tool.oparser import pure_parse
-from pypy.jit.metainterp.optimizeutil import args_dict
-
-##class FakeFrame(object):
-##    parent_resumedata_snapshot = None
-##    parent_resumedata_frame_info_list = None
-
-##    def __init__(self, code="", pc=0):
-##        self.jitcode = code
-##        self.pc = pc
-
-class Fake(object):
-    failargs_limit = 1000
-    storedebug = None
-
-class FakeMetaInterpStaticData(object):
-
-    def __init__(self, cpu):
-        self.cpu = cpu
-        self.profiler = EmptyProfiler()
-        self.options = Fake()
-        self.globaldata = Fake()
+
 
 def test_store_final_boxes_in_guard():
     from pypy.jit.metainterp.compile import ResumeGuardDescr
@@ -101,7 +79,7 @@
     assert vinfo3 is vinfo4
 
 def test_descrlist_dict():
-    from pypy.jit.metainterp import optimizeutil
+    from pypy.jit.metainterp.optimizeopt import util as optimizeutil
     h1 = optimizeutil.descrlist_hash([])
     h2 = optimizeutil.descrlist_hash([LLtypeMixin.valuedescr])
     h3 = optimizeutil.descrlist_hash(
@@ -130,159 +108,55 @@
 
 # ____________________________________________________________
 
-def equaloplists(oplist1, oplist2, strict_fail_args=True, remap={},
-                 text_right=None):
-    # try to use the full width of the terminal to display the list
-    # unfortunately, does not work with the default capture method of py.test
-    # (which is fd), you you need to use either -s or --capture=sys, else you
-    # get the standard 80 columns width
-    totwidth = py.io.get_terminal_width()
-    width = totwidth / 2 - 1
-    print ' Comparing lists '.center(totwidth, '-')
-    text_right = text_right or 'expected'
-    print '%s| %s' % ('optimized'.center(width), text_right.center(width))
-    for op1, op2 in zip(oplist1, oplist2):
-        txt1 = str(op1)
-        txt2 = str(op2)
-        while txt1 or txt2:
-            print '%s| %s' % (txt1[:width].ljust(width), txt2[:width])
-            txt1 = txt1[width:]
-            txt2 = txt2[width:]
-        assert op1.getopnum() == op2.getopnum()
-        assert op1.numargs() == op2.numargs()
-        for i in range(op1.numargs()):
-            x = op1.getarg(i)
-            y = op2.getarg(i)
-            assert x == remap.get(y, y)
-        if op2.result in remap:
-            assert op1.result == remap[op2.result]
-        else:
-            remap[op2.result] = op1.result
-        if op1.getopnum() != rop.JUMP:      # xxx obscure
-            assert op1.getdescr() == op2.getdescr()
-        if op1.getfailargs() or op2.getfailargs():
-            assert len(op1.getfailargs()) == len(op2.getfailargs())
-            if strict_fail_args:
-                for x, y in zip(op1.getfailargs(), op2.getfailargs()):
-                    assert x == remap.get(y, y)
-            else:
-                fail_args1 = set(op1.getfailargs())
-                fail_args2 = set([remap.get(y, y) for y in op2.getfailargs()])
-                assert fail_args1 == fail_args2
-    assert len(oplist1) == len(oplist2)
-    print '-'*totwidth
-    return True
-
-def test_equaloplists():
-    ops = """
-    [i0]
-    i1 = int_add(i0, 1)
-    i2 = int_add(i1, 1)
-    guard_true(i1) [i2]
-    jump(i1)
-    """
-    namespace = {}
-    loop1 = pure_parse(ops, namespace=namespace)
-    loop2 = pure_parse(ops, namespace=namespace)
-    loop3 = pure_parse(ops.replace("i2 = int_add", "i2 = int_sub"),
-                       namespace=namespace)
-    assert equaloplists(loop1.operations, loop2.operations)
-    py.test.raises(AssertionError,
-                   "equaloplists(loop1.operations, loop3.operations)")
-
-def test_equaloplists_fail_args():
-    ops = """
-    [i0]
-    i1 = int_add(i0, 1)
-    i2 = int_add(i1, 1)
-    guard_true(i1) [i2, i1]
-    jump(i1)
-    """
-    namespace = {}
-    loop1 = pure_parse(ops, namespace=namespace)
-    loop2 = pure_parse(ops.replace("[i2, i1]", "[i1, i2]"),
-                       namespace=namespace)
-    py.test.raises(AssertionError,
-                   "equaloplists(loop1.operations, loop2.operations)")
-    assert equaloplists(loop1.operations, loop2.operations,
-                        strict_fail_args=False)
-    loop3 = pure_parse(ops.replace("[i2, i1]", "[i2, i0]"),
-                       namespace=namespace)
-    py.test.raises(AssertionError,
-                   "equaloplists(loop1.operations, loop3.operations)")
-
-# ____________________________________________________________
-
-class Storage(compile.ResumeGuardDescr):
-    "for tests."
-    def __init__(self, metainterp_sd=None, original_greenkey=None):
-        self.metainterp_sd = metainterp_sd
-        self.original_greenkey = original_greenkey
-    def store_final_boxes(self, op, boxes):
-        op.setfailargs(boxes)
-    def __eq__(self, other):
-        return type(self) is type(other)      # xxx obscure
-
-def _sortboxes(boxes):
-    _kind2count = {history.INT: 1, history.REF: 2, history.FLOAT: 3}
-    return sorted(boxes, key=lambda box: _kind2count[box.type])
 
 class BaseTestBasic(BaseTest):
 
-    def invent_fail_descr(self, fail_args):
-        if fail_args is None:
-            return None
-        descr = Storage()
-        descr.rd_frame_info_list = resume.FrameInfo(None, "code", 11)
-        descr.rd_snapshot = resume.Snapshot(None, _sortboxes(fail_args))
-        return descr
-
-    def assert_equal(self, optimized, expected):
-        assert len(optimized.inputargs) == len(expected.inputargs)
-        remap = {}
-        for box1, box2 in zip(optimized.inputargs, expected.inputargs):
-            assert box1.__class__ == box2.__class__
-            remap[box2] = box1
-        assert equaloplists(optimized.operations,
-                            expected.operations, False, remap)
+    enable_opts = "intbounds:rewrite:virtualize:string:heap"
 
     def optimize_loop(self, ops, optops, call_pure_results=None):
+
         loop = self.parse(ops)
-        #
-        self.loop = loop
-        loop.call_pure_results = args_dict()
-        if call_pure_results is not None:
-            for k, v in call_pure_results.items():
-                loop.call_pure_results[list(k)] = v
-        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
-        if hasattr(self, 'vrefinfo'):
-            metainterp_sd.virtualref_info = self.vrefinfo
-        if hasattr(self, 'callinfocollection'):
-            metainterp_sd.callinfocollection = self.callinfocollection
-        #
-        # XXX list the exact optimizations that are needed for each test
-        from pypy.jit.metainterp.optimizeopt import (OptIntBounds,
-                                                     OptRewrite,
-                                                     OptVirtualize,
-                                                     OptString,
-                                                     OptHeap,
-                                                     Optimizer)
-        from pypy.jit.metainterp.optimizeopt.fficall import OptFfiCall
-
-        optimizations = [OptIntBounds(),
-                         OptRewrite(),
-                         OptVirtualize(),
-                         OptString(),
-                         OptHeap(),
-                         OptFfiCall(),
-                         ]
-        optimizer = Optimizer(metainterp_sd, loop, optimizations)
-        optimizer.propagate_all_forward()
-        #
         expected = self.parse(optops)
+        self._do_optimize_loop(loop, call_pure_results)
         print '\n'.join([str(o) for o in loop.operations])
         self.assert_equal(loop, expected)
 
+    def setup_method(self, meth=None):
+        class FailDescr(compile.ResumeGuardDescr):
+            oparse = None
+            def _oparser_uses_descr_of_guard(self, oparse, fail_args):
+                # typically called 3 times: once when parsing 'ops',
+                # once when parsing 'preamble', once when parsing 'expected'.
+                self.oparse = oparse
+                self.rd_frame_info_list, self.rd_snapshot = snapshot(fail_args)
+            def _clone_if_mutable(self):
+                assert self is fdescr
+                return fdescr2
+            def __repr__(self):
+                if self is fdescr:
+                    return 'fdescr'
+                if self is fdescr2:
+                    return 'fdescr2'
+                return compile.ResumeGuardDescr.__repr__(self)
+        #
+        def snapshot(fail_args, got=[]):
+            if not got:    # only the first time, i.e. when parsing 'ops'
+                rd_frame_info_list = resume.FrameInfo(None, "code", 11)
+                rd_snapshot = resume.Snapshot(None, fail_args)
+                got.append(rd_frame_info_list)
+                got.append(rd_snapshot)
+            return got
+        #
+        fdescr = instantiate(FailDescr)
+        self.namespace['fdescr'] = fdescr
+        fdescr2 = instantiate(FailDescr)
+        self.namespace['fdescr2'] = fdescr2
+
+    def teardown_method(self, meth):
+        self.namespace.pop('fdescr', None)
+        self.namespace.pop('fdescr2', None)
+
+
 
 class BaseTestOptimizeBasic(BaseTestBasic):
 
@@ -565,6 +439,23 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_int_is_zero_int_is_true(self):
+        ops = """
+        [i0]
+        i1 = int_is_zero(i0)
+        guard_true(i1) []
+        i2 = int_is_true(i0)
+        guard_false(i2) []
+        jump(i0)
+        """
+        expected = """
+        [i0]
+        i1 = int_is_zero(i0)
+        guard_true(i1) []
+        jump(0)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_ooisnull_oononnull_2(self):
         ops = """
         [p0]
@@ -1231,8 +1122,8 @@
         """
         expected = """
         [i1, p0]
+        p1 = new_array(i1, descr=arraydescr)
         setarrayitem_gc(p0, 0, i1, descr=arraydescr)
-        p1 = new_array(i1, descr=arraydescr)
         jump(i1, p1)
         """
         self.optimize_loop(ops, expected)
@@ -1597,9 +1488,9 @@
         i3 = getarrayitem_gc_pure(p3, 1, descr=arraydescr)
         i4 = getarrayitem_gc(p3, i3, descr=arraydescr)
         i5 = int_add(i3, i4)
-        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         #
         setfield_gc(p1, i2, descr=valuedescr)
+        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         setfield_gc(p1, i4, descr=nextdescr)
         jump(p1, i1, i2, p3)
         """
@@ -1773,6 +1664,7 @@
         self.optimize_loop(ops, expected)
 
     def test_duplicate_getarrayitem_after_setarrayitem_2(self):
+        py.test.skip("setarrayitem with variable index")
         ops = """
         [p1, p2, p3, i1]
         setarrayitem_gc(p1, 0, p2, descr=arraydescr2)
@@ -2035,7 +1927,6 @@
         self.optimize_loop(ops, expected)
 
     def test_merge_guard_nonnull_guard_class(self):
-        self.make_fail_descr()
         ops = """
         [p1, i0, i1, i2, p2]
         guard_nonnull(p1, descr=fdescr) [i0]
@@ -2053,7 +1944,6 @@
         self.check_expanded_fail_descr("i0", rop.GUARD_NONNULL_CLASS)
 
     def test_merge_guard_nonnull_guard_value(self):
-        self.make_fail_descr()
         ops = """
         [p1, i0, i1, i2, p2]
         guard_nonnull(p1, descr=fdescr) [i0]
@@ -2071,7 +1961,6 @@
         self.check_expanded_fail_descr("i0", rop.GUARD_VALUE)
 
     def test_merge_guard_nonnull_guard_class_guard_value(self):
-        self.make_fail_descr()
         ops = """
         [p1, i0, i1, i2, p2]
         guard_nonnull(p1, descr=fdescr) [i0]
@@ -2287,25 +2176,83 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_fold_constant_partial_ops_float(self):
+        ops = """
+        [f0]
+        f1 = float_mul(f0, 1.0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f2 = escape(f0)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [f0]
+        f1 = float_mul(1.0, f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f2 = escape(f0)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+
+        ops = """
+        [f0]
+        f1 = float_mul(f0, -1.0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f1 = float_neg(f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [f0]
+        f1 = float_mul(-1.0, f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        expected = """
+        [f0]
+        f1 = float_neg(f0)
+        f2 = escape(f1)
+        jump(f2)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_fold_repeated_float_neg(self):
+        ops = """
+        [f0]
+        f1 = float_neg(f0)
+        f2 = float_neg(f1)
+        f3 = float_neg(f2)
+        f4 = float_neg(f3)
+        escape(f4)
+        jump(f4)
+        """
+        expected = """
+        [f0]
+        # The backend removes this dead op.
+        f1 = float_neg(f0)
+        escape(f0)
+        jump(f0)
+        """
+        self.optimize_loop(ops, expected)
+
     # ----------
 
-    def make_fail_descr(self):
-        class FailDescr(compile.ResumeGuardDescr):
-            oparse = None
-            def _oparser_uses_descr_of_guard(self, oparse, fail_args):
-                # typically called twice, before and after optimization
-                if self.oparse is None:
-                    fdescr.rd_frame_info_list = resume.FrameInfo(None,
-                                                                 "code", 11)
-                    fdescr.rd_snapshot = resume.Snapshot(None, fail_args)
-                self.oparse = oparse
-        #
-        fdescr = instantiate(FailDescr)
-        self.namespace['fdescr'] = fdescr
-
-    def teardown_method(self, meth):
-        self.namespace.pop('fdescr', None)
-
     def _verify_fail_args(self, boxes, oparse, text):
         import re
         r = re.compile(r"\bwhere\s+(\w+)\s+is a\s+(\w+)")
@@ -2414,7 +2361,6 @@
         self._verify_fail_args(boxes, fdescr.oparse, expectedtext)
 
     def test_expand_fail_1(self):
-        self.make_fail_descr()
         ops = """
         [i1, i3]
         # first rename i3 into i4
@@ -2435,7 +2381,6 @@
         self.check_expanded_fail_descr('15, i3', rop.GUARD_TRUE)
 
     def test_expand_fail_2(self):
-        self.make_fail_descr()
         ops = """
         [i1, i2]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2455,7 +2400,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_3(self):
-        self.make_fail_descr()
         ops = """
         [i1, i2, i3, p3]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2481,7 +2425,7 @@
     def test_expand_fail_4(self):
         for arg in ['p1', 'i2,p1', 'p1,p2', 'p2,p1',
                     'i2,p1,p2', 'i2,p2,p1']:
-            self.make_fail_descr()
+            self.setup_method() # humpf
             ops = """
             [i1, i2, i3]
             p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2506,7 +2450,6 @@
                                            rop.GUARD_TRUE)
 
     def test_expand_fail_5(self):
-        self.make_fail_descr()
         ops = """
         [i1, i2, i3, i4]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2530,7 +2473,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_6(self):
-        self.make_fail_descr()
         ops = """
         [p0, i0, i1]
         guard_true(i0, descr=fdescr) [p0]
@@ -2551,7 +2493,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_varray(self):
-        self.make_fail_descr()
         ops = """
         [i1]
         p1 = new_array(3, descr=arraydescr)
@@ -2572,7 +2513,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_vstruct(self):
-        self.make_fail_descr()
         ops = """
         [i1, p1]
         p2 = new(descr=ssize)
@@ -2594,7 +2534,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_v_all_1(self):
-        self.make_fail_descr()
         ops = """
         [i1, p1a, i2]
         p6s = getarrayitem_gc(p1a, 0, descr=arraydescr2)
@@ -2636,7 +2575,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_lazy_setfield_1(self):
-        self.make_fail_descr()
         ops = """
         [p1, i2, i3]
         p2 = new_with_vtable(ConstClass(node_vtable))
@@ -2662,7 +2600,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_lazy_setfield_2(self):
-        self.make_fail_descr()
         ops = """
         [i2, i3]
         p2 = new_with_vtable(ConstClass(node_vtable))
@@ -2686,9 +2623,6 @@
             where p2 is a node_vtable, valuedescr=i2
             ''', rop.GUARD_TRUE)
 
-
-class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
-
     def test_residual_call_does_not_invalidate_caches(self):
         ops = """
         [p1, p2]
@@ -2980,7 +2914,6 @@
         self.optimize_loop(ops, expected)
 
     def test_vref_virtual_2(self):
-        self.make_fail_descr()
         ops = """
         [p0, i1]
         #
@@ -3026,7 +2959,6 @@
             ''', rop.GUARD_NOT_FORCED)
 
     def test_vref_virtual_and_lazy_setfield(self):
-        self.make_fail_descr()
         ops = """
         [p0, i1]
         #
@@ -3065,7 +2997,6 @@
             ''', rop.GUARD_NO_EXCEPTION)
 
     def test_vref_virtual_after_finish(self):
-        self.make_fail_descr()
         ops = """
         [i1]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -3092,7 +3023,6 @@
         self.optimize_loop(ops, expected)
 
     def test_vref_nonvirtual_and_lazy_setfield(self):
-        self.make_fail_descr()
         ops = """
         [i1, p1]
         p2 = virtual_ref(p1, 23)
@@ -3986,11 +3916,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p3 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p3, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, i4, i5)
+        copystrcontent(p1, p3, 0, 0, i1)
+        copystrcontent(p2, p3, 0, i1, i2)
         jump(p2, p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4011,9 +3938,7 @@
         p3 = newstr(i3)
         strsetitem(p3, 0, i0)
         strsetitem(p3, 1, i1)
-        i4 = strlen(p2)
-        i5 = int_add(2, i4)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, 2, i4)
+        copystrcontent(p2, p3, 0, 2, i2)
         jump(i1, i0, p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4032,10 +3957,9 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 2)
         p3 = newstr(i3)
-        i4 = strlen(p2)
-        copystrcontent(p2, p3, 0, 0, i4)
-        strsetitem(p3, i4, i0)
-        i5 = int_add(i4, 1)
+        copystrcontent(p2, p3, 0, 0, i2)
+        strsetitem(p3, i2, i0)
+        i5 = int_add(i2, 1)
         strsetitem(p3, i5, i1)
         i6 = int_add(i5, 1)      # will be killed by the backend
         jump(i1, i0, p3)
@@ -4057,14 +3981,9 @@
         i3 = strlen(p3)
         i123 = int_add(i12, i3)
         p5 = newstr(i123)
-        i1b = strlen(p1)
-        copystrcontent(p1, p5, 0, 0, i1b)
-        i2b = strlen(p2)
-        i12b = int_add(i1b, i2b)
-        copystrcontent(p2, p5, 0, i1b, i2b)
-        i3b = strlen(p3)
-        i123b = int_add(i12b, i3b)      # will be killed by the backend
-        copystrcontent(p3, p5, 0, i12b, i3b)
+        copystrcontent(p1, p5, 0, 0, i1)
+        copystrcontent(p2, p5, 0, i1, i2)
+        copystrcontent(p3, p5, 0, i12, i3)
         jump(p2, p3, p5)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4080,10 +3999,8 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 1)
         p3 = newstr(i3)
-        i4 = strlen(p2)
-        copystrcontent(p2, p3, 0, 0, i4)
-        strsetitem(p3, i4, 120)     # == ord('x')
-        i5 = int_add(i4, 1)      # will be killed by the backend
+        copystrcontent(p2, p3, 0, 0, i2)
+        strsetitem(p3, i2, 120)     # == ord('x')
         jump(p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -4201,16 +4118,13 @@
         i5 = int_add(i3, i4)
         p4 = newstr(i5)
         copystrcontent(p1, p4, i1, 0, i3)
-        i4b = strlen(p2)
-        i6 = int_add(i3, i4b)    # killed by the backend
-        copystrcontent(p2, p4, 0, i3, i4b)
+        copystrcontent(p2, p4, 0, i3, i4)
         jump(p4, i1, i2, p2)
         """
         self.optimize_strunicode_loop(ops, expected)
 
     # ----------
     def optimize_strunicode_loop_extradescrs(self, ops, optops):
-        from pypy.jit.metainterp.optimizeopt import string
         class FakeCallInfoCollection:
             def callinfo_for_oopspec(self, oopspecindex):
                 calldescrtype = type(LLtypeMixin.strequaldescr)
@@ -4249,11 +4163,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p4 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p4, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p4, 0, i4, i5)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, p3, p4, descr=strequaldescr)
         escape(i0)
         jump(p1, p2, p3)
@@ -4445,11 +4356,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p4 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p4, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p4, 0, i4, i5)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
         escape(i0)
         jump(p1, p2)
@@ -4566,6 +4474,66 @@
         # not obvious, because of the exception UnicodeDecodeError that
         # can be raised by ll_str2unicode()
 
+    def test_strgetitem_repeated(self):
+        ops = """
+        [p0, i0]
+        i1 = strgetitem(p0, i0)
+        i2 = strgetitem(p0, i0)
+        i3 = int_eq(i1, i2)
+        guard_true(i3) []
+        escape(i2)
+        jump(p0, i0)
+        """
+        expected = """
+        [p0, i0]
+        i1 = strgetitem(p0, i0)
+        escape(i1)
+        jump(p0, i0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_int_is_true_bounds(self):
+        ops = """
+        [p0]
+        i0 = strlen(p0)
+        i1 = int_is_true(i0)
+        guard_true(i1) []
+        i2 = int_ge(0, i0)
+        guard_false(i2) []
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        i0 = strlen(p0)
+        i1 = int_is_true(i0)
+        guard_true(i1) []
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_strslice_with_other_stuff(self):
+        ops = """
+        [p0, i0]
+        i1 = int_add(i0, 1)
+        p1 = call(0, p0, i0, i1, descr=strslicedescr)
+        escape(p1)
+        jump(p0, i1)
+        """
+        expected = """
+        [p0, i0]
+        i1 = int_add(i0, 1)
+        p1 = newstr(1)
+        i2 = strgetitem(p0, i0)
+        strsetitem(p1, 0, i2)
+        escape(p1)
+        jump(p0, i1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
+
+class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
+    pass
+
 
 ##class TestOOtype(BaseTestOptimizeBasic, OOtypeMixin):
 
diff --git a/pypy/jit/metainterp/test/test_optimizefficall.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
rename from pypy/jit/metainterp/test/test_optimizefficall.py
rename to pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
--- a/pypy/jit/metainterp/test/test_optimizefficall.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
@@ -2,8 +2,8 @@
 from pypy.rlib.libffi import Func, types
 from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.metainterp.test.test_optimizebasic import BaseTestBasic
-from pypy.jit.metainterp.test.test_optimizebasic import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import BaseTestBasic
+from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import LLtypeMixin
 
 class MyCallDescr(AbstractDescr):
     """
@@ -32,12 +32,15 @@
 
 
 class TestFfiCall(BaseTestBasic, LLtypeMixin):
-    jit_ffi = True
+
+    enable_opts = "intbounds:rewrite:virtualize:string:heap:ffi"
 
     class namespace:
         cpu = LLtypeMixin.cpu
         FUNC = LLtypeMixin.FUNC
         vable_token_descr = LLtypeMixin.valuedescr
+        valuedescr = LLtypeMixin.valuedescr
+
         int_float__int = MyCallDescr('if', 'i')
         funcptr = FakeLLObject()
         func = FakeLLObject(_fake_class=Func,
@@ -48,7 +51,7 @@
                              restype=types.sint)
         #
         def calldescr(cpu, FUNC, oopspecindex, extraeffect=None):
-            einfo = EffectInfo([], [], [], oopspecindex=oopspecindex,
+            einfo = EffectInfo([], [], [], [], oopspecindex=oopspecindex,
                                extraeffect=extraeffect)
             return cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT, einfo)
         #
@@ -76,7 +79,7 @@
         """
         expected = """
         [i0, f1]
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         jump(i3, f1)
@@ -99,7 +102,7 @@
 
     def test_handle_virtualizables(self):
         # this test needs an explanation to understand what goes on: see the
-        # coment in optimize_FORCE_TOKEN
+        # comment in optimize_FORCE_TOKEN
         ops = """
         [i0, f1, p2]
         call(0, ConstPtr(func),                       descr=libffi_prepare)
@@ -116,7 +119,7 @@
         [i0, f1, p2]
         i4 = force_token()
         setfield_gc(p2, i4, descr=vable_token_descr)
-        i3 = call_may_force(12345, i0, f1, descr=int_float__int)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
         guard_not_forced() [p2]
         guard_no_exception() [p2]
         jump(i3, f1, p2)
@@ -213,7 +216,7 @@
         call(0, ConstPtr(func),                        descr=libffi_prepare)
         #
         # this "nested" call is nicely optimized
-        i4 = call_may_force(67890, i0, f1, descr=int_float__int)
+        i4 = call_release_gil(67890, i0, f1, descr=int_float__int)
         guard_not_forced() []
         guard_no_exception() []
         #
@@ -242,3 +245,25 @@
         """
         expected = ops
         loop = self.optimize_loop(ops, expected)
+
+    def test_allow_setfields_in_between(self):
+        ops = """
+        [i0, f1, p2]
+        call(0, ConstPtr(func),                       descr=libffi_prepare)
+        call(0, ConstPtr(func), i0,                   descr=libffi_push_arg)
+        call(0, ConstPtr(func), f1,                   descr=libffi_push_arg)
+        setfield_gc(p2, i0,                           descr=valuedescr)
+        i3 = call_may_force(0, ConstPtr(func), 12345, descr=libffi_call)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        expected = """
+        [i0, f1, p2]
+        setfield_gc(p2, i0, descr=valuedescr)
+        i3 = call_release_gil(12345, i0, f1, descr=int_float__int)
+        guard_not_forced() []
+        guard_no_exception() []
+        jump(i3, f1, p2)
+        """
+        loop = self.optimize_loop(ops, expected)
diff --git a/pypy/jit/metainterp/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
rename from pypy/jit/metainterp/test/test_optimizeopt.py
rename to pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1,202 +1,88 @@
 import py
 from pypy.rlib.objectmodel import instantiate
-from pypy.jit.metainterp.test.test_optimizeutil import (LLtypeMixin,
-                                                        #OOtypeMixin,
-                                                        BaseTest)
+from pypy.jit.metainterp.optimizeopt.test.test_util import (
+    LLtypeMixin, BaseTest, Storage, _sortboxes)
 import pypy.jit.metainterp.optimizeopt.optimizer as optimizeopt
 import pypy.jit.metainterp.optimizeopt.virtualize as virtualize
-from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimizeopt import optimize_loop_1, ALL_OPTS_DICT, build_opt_chain
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt
 from pypy.jit.metainterp.history import TreeLoop, LoopToken
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp import executor, compile, resume, history
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
 from pypy.jit.tool.oparser import pure_parse
-from pypy.jit.metainterp.test.test_optimizebasic import equaloplists
-from pypy.jit.metainterp.optimizeutil import args_dict
-
-class Fake(object):
-    failargs_limit = 1000
-    storedebug = None
-
-class FakeMetaInterpStaticData(object):
-
-    def __init__(self, cpu, jit_ffi=False):
-        self.cpu = cpu
-        self.profiler = EmptyProfiler()
-        self.options = Fake()
-        self.globaldata = Fake()
-        self.jit_ffi = jit_ffi
-
-def test_store_final_boxes_in_guard():
-    from pypy.jit.metainterp.compile import ResumeGuardDescr
-    from pypy.jit.metainterp.resume import tag, TAGBOX
-    b0 = BoxInt()
-    b1 = BoxInt()
-    opt = optimizeopt.Optimizer(FakeMetaInterpStaticData(LLtypeMixin.cpu),
-                                None)
-    fdescr = ResumeGuardDescr()
-    op = ResOperation(rop.GUARD_TRUE, ['dummy'], None, descr=fdescr)
-    # setup rd data
-    fi0 = resume.FrameInfo(None, "code0", 11)
-    fdescr.rd_frame_info_list = resume.FrameInfo(fi0, "code1", 33)
-    snapshot0 = resume.Snapshot(None, [b0])
-    fdescr.rd_snapshot = resume.Snapshot(snapshot0, [b1])
+from pypy.jit.metainterp.optimizeopt.util import args_dict
+from pypy.jit.metainterp.optimizeopt.test.test_optimizebasic import FakeMetaInterpStaticData
+from pypy.config.pypyoption import get_pypy_config
+
+
+def test_build_opt_chain():
+    def check(chain, expected_names):
+        names = [opt.__class__.__name__ for opt in chain]
+        assert names == expected_names
     #
-    opt.store_final_boxes_in_guard(op)
-    if op.getfailargs() == [b0, b1]:
-        assert list(fdescr.rd_numb.nums)      == [tag(1, TAGBOX)]
-        assert list(fdescr.rd_numb.prev.nums) == [tag(0, TAGBOX)]
-    else:
-        assert op.getfailargs() == [b1, b0]
-        assert list(fdescr.rd_numb.nums)      == [tag(0, TAGBOX)]
-        assert list(fdescr.rd_numb.prev.nums) == [tag(1, TAGBOX)]
-    assert fdescr.rd_virtuals is None
-    assert fdescr.rd_consts == []
-
-def test_sharing_field_lists_of_virtual():
-    class FakeOptimizer(object):
-        class cpu(object):
-            pass
-    opt = FakeOptimizer()
-    virt1 = virtualize.AbstractVirtualStructValue(opt, None)
-    lst1 = virt1._get_field_descr_list()
-    assert lst1 == []
-    lst2 = virt1._get_field_descr_list()
-    assert lst1 is lst2
-    virt1.setfield(LLtypeMixin.valuedescr, optimizeopt.OptValue(None))
-    lst3 = virt1._get_field_descr_list()
-    assert lst3 == [LLtypeMixin.valuedescr]
-    lst4 = virt1._get_field_descr_list()
-    assert lst3 is lst4
-
-    virt2 = virtualize.AbstractVirtualStructValue(opt, None)
-    lst5 = virt2._get_field_descr_list()
-    assert lst5 is lst1
-    virt2.setfield(LLtypeMixin.valuedescr, optimizeopt.OptValue(None))
-    lst6 = virt1._get_field_descr_list()
-    assert lst6 is lst3
-
-def test_reuse_vinfo():
-    class FakeVInfo(object):
-        def set_content(self, fieldnums):
-            self.fieldnums = fieldnums
-        def equals(self, fieldnums):
-            return self.fieldnums == fieldnums
-    class FakeVirtualValue(virtualize.AbstractVirtualValue):
-        def _make_virtual(self, *args):
-            return FakeVInfo()
-    v1 = FakeVirtualValue(None, None, None)
-    vinfo1 = v1.make_virtual_info(None, [1, 2, 4])
-    vinfo2 = v1.make_virtual_info(None, [1, 2, 4])
-    assert vinfo1 is vinfo2
-    vinfo3 = v1.make_virtual_info(None, [1, 2, 6])
-    assert vinfo3 is not vinfo2
-    vinfo4 = v1.make_virtual_info(None, [1, 2, 6])
-    assert vinfo3 is vinfo4
-
-def test_descrlist_dict():
-    from pypy.jit.metainterp import optimizeutil
-    h1 = optimizeutil.descrlist_hash([])
-    h2 = optimizeutil.descrlist_hash([LLtypeMixin.valuedescr])
-    h3 = optimizeutil.descrlist_hash(
-            [LLtypeMixin.valuedescr, LLtypeMixin.nextdescr])
-    assert h1 != h2
-    assert h2 != h3
-    assert optimizeutil.descrlist_eq([], [])
-    assert not optimizeutil.descrlist_eq([], [LLtypeMixin.valuedescr])
-    assert optimizeutil.descrlist_eq([LLtypeMixin.valuedescr],
-                                     [LLtypeMixin.valuedescr])
-    assert not optimizeutil.descrlist_eq([LLtypeMixin.valuedescr],
-                                         [LLtypeMixin.nextdescr])
-    assert optimizeutil.descrlist_eq([LLtypeMixin.valuedescr, LLtypeMixin.nextdescr],
-                                     [LLtypeMixin.valuedescr, LLtypeMixin.nextdescr])
-    assert not optimizeutil.descrlist_eq([LLtypeMixin.nextdescr, LLtypeMixin.valuedescr],
-                                         [LLtypeMixin.valuedescr, LLtypeMixin.nextdescr])
-
-    # descrlist_eq should compare by identity of the descrs, not by the result
-    # of sort_key
-    class FakeDescr(object):
-        def sort_key(self):
-            return 1
-
-    assert not optimizeutil.descrlist_eq([FakeDescr()], [FakeDescr()])
+    metainterp_sd = FakeMetaInterpStaticData(None)
+    chain, _ = build_opt_chain(metainterp_sd, "", inline_short_preamble=False)
+    check(chain, ["OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "")
+    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "")
+    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "heap:intbounds")
+    check(chain, ["OptInlineShortPreamble", "OptIntBounds", "OptHeap", "OptSimplify"])
+    #
+    chain, unroll = build_opt_chain(metainterp_sd, "unroll")
+    check(chain, ["OptInlineShortPreamble", "OptSimplify"])
+    assert unroll
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "aaa:bbb", inline_short_preamble=False)
+    check(chain, ["OptSimplify"])
+    #
+    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    check(chain, ["OptFfiCall", "OptSimplify"])
+    #
+    metainterp_sd.config = get_pypy_config(translating=True)
+    assert not metainterp_sd.config.translation.jit_ffi
+    chain, _ = build_opt_chain(metainterp_sd, "ffi", inline_short_preamble=False)
+    check(chain, ["OptSimplify"])
+
 
 # ____________________________________________________________
-class Storage(compile.ResumeGuardDescr):
-    "for tests."
-    def __init__(self, metainterp_sd=None, original_greenkey=None):
-        self.metainterp_sd = metainterp_sd
-        self.original_greenkey = original_greenkey
-    def store_final_boxes(self, op, boxes):
-        op.setfailargs(boxes)
-    def __eq__(self, other):
-        return type(self) is type(other)      # xxx obscure
+
+
+class FakeDescr(compile.ResumeGuardDescr):
+    class rd_snapshot:
+        class prev:
+            prev = None
+            boxes = []
+        boxes = []
     def clone_if_mutable(self):
-        res = Storage(self.metainterp_sd, self.original_greenkey)
-        self.copy_all_attributes_into(res)
-        return res
-
-def _sortboxes(boxes):
-    _kind2count = {history.INT: 1, history.REF: 2, history.FLOAT: 3}
-    return sorted(boxes, key=lambda box: _kind2count[box.type])
-
-class BaseTestOptimizeOpt(BaseTest):
-    jit_ffi = False
-
-    def invent_fail_descr(self, fail_args):
-        if fail_args is None:
-            return None
-        descr = Storage()
-        descr.rd_frame_info_list = resume.FrameInfo(None, "code", 11)
-        descr.rd_snapshot = resume.Snapshot(None, _sortboxes(fail_args))
-        return descr
-
-    def assert_equal(self, optimized, expected, text_right=None):
-        assert len(optimized.inputargs) == len(expected.inputargs)
-        remap = {}
-        for box1, box2 in zip(optimized.inputargs, expected.inputargs):
-            assert box1.__class__ == box2.__class__
-            remap[box2] = box1
-        assert equaloplists(optimized.operations,
-                            expected.operations, False, remap, text_right)
-
-    def optimize_loop(self, ops, optops, expected_preamble=None,
+        return self
+
+
+class BaseTestWithUnroll(BaseTest):
+
+    enable_opts = "intbounds:rewrite:virtualize:string:heap:unroll"
+
+    def optimize_loop(self, ops, expected, expected_preamble=None,
                       call_pure_results=None):
         loop = self.parse(ops)
-        if optops != "crash!":
-            expected = self.parse(optops)
-        else:
-            expected = "crash!"
+        if expected != "crash!":
+            expected = self.parse(expected)
         if expected_preamble:
             expected_preamble = self.parse(expected_preamble)
-        #
-        self.loop = loop
-        loop.call_pure_results = args_dict()
-        if call_pure_results is not None:
-            for k, v in call_pure_results.items():
-                loop.call_pure_results[list(k)] = v
+
         loop.preamble = TreeLoop('preamble')
         loop.preamble.inputargs = loop.inputargs
         loop.preamble.token = LoopToken()
-        metainterp_sd = FakeMetaInterpStaticData(self.cpu, self.jit_ffi)
-        if hasattr(self, 'vrefinfo'):
-            metainterp_sd.virtualref_info = self.vrefinfo
-        if hasattr(self, 'callinfocollection'):
-            metainterp_sd.callinfocollection = self.callinfocollection
-        class FakeDescr(compile.ResumeGuardDescr):
-            class rd_snapshot:
-                class prev:
-                    prev = None
-                    boxes = []
-                boxes = []
-            def clone_if_mutable(self):
-                return self
         loop.preamble.start_resumedescr = FakeDescr()
-        optimize_loop_1(metainterp_sd, loop, ALL_OPTS_DICT)
         #
-
+        self._do_optimize_loop(loop, call_pure_results)
+        #
         print
         print loop.preamble.inputargs
         print '\n'.join([str(o) for o in loop.preamble.operations])
@@ -204,16 +90,14 @@
         print loop.inputargs
         print '\n'.join([str(o) for o in loop.operations])
         print
-
         assert expected != "crash!", "should have raised an exception"
         self.assert_equal(loop, expected)
         if expected_preamble:
             self.assert_equal(loop.preamble, expected_preamble,
                               text_right='expected preamble')
-
         return loop
 
-class OptimizeOptTest(BaseTestOptimizeOpt):
+class OptimizeOptTest(BaseTestWithUnroll):
 
     def setup_method(self, meth=None):
         class FailDescr(compile.ResumeGuardDescr):
@@ -1497,8 +1381,8 @@
         """
         expected = """
         [i1, p0]
+        p1 = new_array(i1, descr=arraydescr)
         setarrayitem_gc(p0, 0, i1, descr=arraydescr)
-        p1 = new_array(i1, descr=arraydescr)
         jump(i1, p1)
         """
         self.optimize_loop(ops, expected)
@@ -1922,9 +1806,9 @@
         i3 = getarrayitem_gc_pure(p3, 1, descr=arraydescr)
         i4 = getarrayitem_gc(p3, i3, descr=arraydescr)
         i5 = int_add(i3, i4)
-        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         #
         setfield_gc(p1, i2, descr=valuedescr)
+        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         setfield_gc(p1, i4, descr=nextdescr)
         escape()
         jump(p1, i1, i2, p3, i3)
@@ -1934,9 +1818,9 @@
         #
         i4 = getarrayitem_gc(p3, i3, descr=arraydescr)
         i5 = int_add(i3, i4)
-        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         #
         setfield_gc(p1, i2, descr=valuedescr)
+        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         setfield_gc(p1, i4, descr=nextdescr)
         escape()
         jump(p1, i1, i2, p3, i3)
@@ -2171,6 +2055,7 @@
         self.optimize_loop(ops, expected)
 
     def test_duplicate_getarrayitem_after_setarrayitem_2(self):
+        py.test.skip("setarrayitem with variable index")
         ops = """
         [p1, p2, p3, i1]
         setarrayitem_gc(p1, 0, p2, descr=arraydescr2)
@@ -2857,8 +2742,6 @@
 
     # ----------
 
-class TestLLtype(OptimizeOptTest, LLtypeMixin):
-
     def test_residual_call_does_not_invalidate_caches(self):
         ops = """
         [p1, p2]
@@ -5199,11 +5082,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p3 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p3, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, i4, i5)
+        copystrcontent(p1, p3, 0, 0, i1)
+        copystrcontent(p2, p3, 0, i1, i2)
         jump(p2, p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -5224,9 +5104,7 @@
         p3 = newstr(i3)
         strsetitem(p3, 0, i0)
         strsetitem(p3, 1, i1)
-        i4 = strlen(p2)
-        i5 = int_add(2, i4)      # will be killed by the backend
-        copystrcontent(p2, p3, 0, 2, i4)
+        copystrcontent(p2, p3, 0, 2, i2)
         jump(i1, i0, p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -5245,10 +5123,9 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 2)
         p3 = newstr(i3)
-        i4 = strlen(p2)
-        copystrcontent(p2, p3, 0, 0, i4)
-        strsetitem(p3, i4, i0)
-        i5 = int_add(i4, 1)
+        copystrcontent(p2, p3, 0, 0, i2)
+        strsetitem(p3, i2, i0)
+        i5 = int_add(i2, 1)
         strsetitem(p3, i5, i1)
         i6 = int_add(i5, 1)      # will be killed by the backend
         jump(i1, i0, p3)
@@ -5270,14 +5147,9 @@
         i3 = strlen(p3)
         i123 = int_add(i12, i3)
         p5 = newstr(i123)
-        i1b = strlen(p1)
-        copystrcontent(p1, p5, 0, 0, i1b)
-        i2b = strlen(p2)
-        i12b = int_add(i1b, i2b)
-        copystrcontent(p2, p5, 0, i1b, i2b)
-        i3b = strlen(p3)
-        i123b = int_add(i12b, i3b)      # will be killed by the backend
-        copystrcontent(p3, p5, 0, i12b, i3b)
+        copystrcontent(p1, p5, 0, 0, i1)
+        copystrcontent(p2, p5, 0, i1, i2)
+        copystrcontent(p3, p5, 0, i12, i3)
         jump(p2, p3, p5)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -5293,10 +5165,8 @@
         i2 = strlen(p2)
         i3 = int_add(i2, 1)
         p3 = newstr(i3)
-        i4 = strlen(p2)
-        copystrcontent(p2, p3, 0, 0, i4)
-        strsetitem(p3, i4, 120)     # == ord('x')
-        i5 = int_add(i4, 1)      # will be killed by the backend
+        copystrcontent(p2, p3, 0, 0, i2)
+        strsetitem(p3, i2, 120)     # == ord('x')
         jump(p3)
         """
         self.optimize_strunicode_loop(ops, expected)
@@ -5420,14 +5290,12 @@
         i5 = int_add(i3, i4)
         p4 = newstr(i5)
         copystrcontent(p1, p4, i1, 0, i3)
-        i4b = strlen(p2)
-        i6 = int_add(i3, i4b)    # killed by the backend
-        copystrcontent(p2, p4, 0, i3, i4b)
+        copystrcontent(p2, p4, 0, i3, i4)
         jump(p4, i1, i2, p2)
         """
         self.optimize_strunicode_loop(ops, expected)
 
-    def test_strgetitem_small(self):
+    def test_strgetitem_bounds(self):
         ops = """
         [p0, i0]
         i1 = strgetitem(p0, i0)
@@ -5439,7 +5307,20 @@
         """
         expected = """
         [p0, i0]
-        i1 = strgetitem(p0, i0)
+        jump(p0, i0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_unicodegetitem_bounds(self):
+        ops = """
+        [p0, i0]
+        i1 = unicodegetitem(p0, i0)
+        i2 = int_lt(i1, 0)
+        guard_false(i2) []
+        jump(p0, i0)
+        """
+        expected = """
+        [p0, i0]
         jump(p0, i0)
         """
         self.optimize_loop(ops, expected)
@@ -5477,7 +5358,6 @@
 
     # ----------
     def optimize_strunicode_loop_extradescrs(self, ops, optops, preamble=None):
-        from pypy.jit.metainterp.optimizeopt import string
         class FakeCallInfoCollection:
             def callinfo_for_oopspec(self, oopspecindex):
                 calldescrtype = type(LLtypeMixin.strequaldescr)
@@ -5516,11 +5396,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p4 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p4, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p4, 0, i4, i5)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, p3, p4, descr=strequaldescr)
         escape(i0)
         jump(p1, p2, p3)
@@ -5714,11 +5591,8 @@
         i2 = strlen(p2)
         i3 = int_add(i1, i2)
         p4 = newstr(i3)
-        i4 = strlen(p1)
-        copystrcontent(p1, p4, 0, 0, i4)
-        i5 = strlen(p2)
-        i6 = int_add(i4, i5)      # will be killed by the backend
-        copystrcontent(p2, p4, 0, i4, i5)
+        copystrcontent(p1, p4, 0, 0, i1)
+        copystrcontent(p2, p4, 0, i1, i2)
         i0 = call(0, s"hello world", p4, descr=streq_nonnull_descr)
         escape(i0)
         jump(p1, p2)
@@ -5953,3 +5827,54 @@
         jump(i3, i4)
         """
         self.optimize_loop(ops, expected)
+
+    def test_forced_virtual_pure_getfield(self):
+        ops = """
+        [p0]
+        p1 = getfield_gc_pure(p0, descr=valuedescr)
+        jump(p1)
+        """
+        self.optimize_loop(ops, ops)
+
+        ops = """
+        [p0]
+        p1 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p1, p0, descr=valuedescr)
+        escape(p1)
+        p2 = getfield_gc_pure(p1, descr=valuedescr)
+        escape(p2)
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        p1 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p1, p0, descr=valuedescr)
+        escape(p1)
+        escape(p0)
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_setarrayitem_lazy(self):
+        ops = """
+        [i0, i1]
+        p0 = escape()
+        i2 = escape()
+        p1 = new_with_vtable(ConstClass(node_vtable))
+        setarrayitem_gc(p0, 2, p1, descr=arraydescr)
+        guard_true(i2) []
+        setarrayitem_gc(p0, 2, p0, descr=arraydescr)
+        jump(i0, i1)
+        """
+        expected = """
+        [i0, i1]
+        p0 = escape()
+        i2 = escape()
+        guard_true(i2) [p0]
+        setarrayitem_gc(p0, 2, p0, descr=arraydescr)
+        jump(i0, i1)
+        """
+        self.optimize_loop(ops, expected)
+
+class TestLLtype(OptimizeOptTest, LLtypeMixin):
+    pass
diff --git a/pypy/jit/metainterp/test/test_optimizeutil.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
rename from pypy/jit/metainterp/test/test_optimizeutil.py
rename to pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/test/test_optimizeutil.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -9,11 +9,15 @@
 from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstPtr,
                                          Const, TreeLoop, BoxObj,
                                          ConstObj, AbstractDescr)
-from pypy.jit.metainterp.optimizeutil import sort_descrs, InvalidLoop
+from pypy.jit.metainterp.optimizeopt.util import sort_descrs, equaloplists
+from pypy.jit.metainterp.optimize import InvalidLoop
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter.heaptracker import register_known_gctype, adr2int
-from pypy.jit.tool.oparser import parse
+from pypy.jit.tool.oparser import parse, pure_parse
 from pypy.jit.metainterp.quasiimmut import QuasiImmutDescr
+from pypy.jit.metainterp import compile, resume, history
+from pypy.jit.metainterp.jitprof import EmptyProfiler
+from pypy.config.pypyoption import get_pypy_config
 
 def test_sort_descrs():
     class PseudoDescr(AbstractDescr):
@@ -28,6 +32,44 @@
         sort_descrs(lst2)
         assert lst2 == lst
 
+def test_equaloplists():
+    ops = """
+    [i0]
+    i1 = int_add(i0, 1)
+    i2 = int_add(i1, 1)
+    guard_true(i1) [i2]
+    jump(i1)
+    """
+    namespace = {}
+    loop1 = pure_parse(ops, namespace=namespace)
+    loop2 = pure_parse(ops, namespace=namespace)
+    loop3 = pure_parse(ops.replace("i2 = int_add", "i2 = int_sub"),
+                       namespace=namespace)
+    assert equaloplists(loop1.operations, loop2.operations)
+    py.test.raises(AssertionError,
+                   "equaloplists(loop1.operations, loop3.operations)")
+
+def test_equaloplists_fail_args():
+    ops = """
+    [i0]
+    i1 = int_add(i0, 1)
+    i2 = int_add(i1, 1)
+    guard_true(i1) [i2, i1]
+    jump(i1)
+    """
+    namespace = {}
+    loop1 = pure_parse(ops, namespace=namespace)
+    loop2 = pure_parse(ops.replace("[i2, i1]", "[i1, i2]"),
+                       namespace=namespace)
+    py.test.raises(AssertionError,
+                   "equaloplists(loop1.operations, loop2.operations)")
+    assert equaloplists(loop1.operations, loop2.operations,
+                        strict_fail_args=False)
+    loop3 = pure_parse(ops.replace("[i2, i1]", "[i2, i0]"),
+                       namespace=namespace)
+    py.test.raises(AssertionError,
+                   "equaloplists(loop1.operations, loop3.operations)")
+
 # ____________________________________________________________
 
 class LLtypeMixin(object):
@@ -124,19 +166,19 @@
     FUNC = lltype.FuncType([lltype.Signed], lltype.Signed)
     plaincalldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
     nonwritedescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                    EffectInfo([], [], []))
+                                    EffectInfo([], [], [], []))
     writeadescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                  EffectInfo([], [adescr], []))
+                                  EffectInfo([], [], [adescr], []))
     writearraydescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                      EffectInfo([], [adescr], [arraydescr]))
+                                  EffectInfo([], [], [adescr], [arraydescr]))
     readadescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                 EffectInfo([adescr], [], []))
+                                 EffectInfo([adescr], [], [], []))
     mayforcevirtdescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                 EffectInfo([nextdescr], [], [],
+                 EffectInfo([nextdescr], [], [], [],
                             EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE,
                             can_invalidate=True))
     arraycopydescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                 EffectInfo([], [], [], oopspecindex=EffectInfo.OS_ARRAYCOPY))
+             EffectInfo([], [], [], [], oopspecindex=EffectInfo.OS_ARRAYCOPY))
 
     for _name, _os in [
         ('strconcatdescr',               'OS_STR_CONCAT'),
@@ -153,15 +195,15 @@
         _oopspecindex = getattr(EffectInfo, _os)
         locals()[_name] = \
             cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                EffectInfo([], [], [], oopspecindex=_oopspecindex))
+                EffectInfo([], [], [], [], oopspecindex=_oopspecindex))
         #
         _oopspecindex = getattr(EffectInfo, _os.replace('STR', 'UNI'))
         locals()[_name.replace('str', 'unicode')] = \
             cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                EffectInfo([], [], [], oopspecindex=_oopspecindex))
+                EffectInfo([], [], [], [], oopspecindex=_oopspecindex))
 
     s2u_descr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                EffectInfo([], [], [], oopspecindex=EffectInfo.OS_STR2UNICODE))
+            EffectInfo([], [], [], [], oopspecindex=EffectInfo.OS_STR2UNICODE))
     #
 
     class LoopToken(AbstractDescr):
@@ -256,8 +298,45 @@
 ##                       u_vtable_adr: cpu.typedescrof(U)}
 ##    namespace = locals()
 
+# ____________________________________________________________
+
+
+
+class Fake(object):
+    failargs_limit = 1000
+    storedebug = None
+
+
+class FakeMetaInterpStaticData(object):
+
+    def __init__(self, cpu):
+        self.cpu = cpu
+        self.profiler = EmptyProfiler()
+        self.options = Fake()
+        self.globaldata = Fake()
+        self.config = get_pypy_config(translating=True)
+        self.config.translation.jit_ffi = True
+
+
+class Storage(compile.ResumeGuardDescr):
+    "for tests."
+    def __init__(self, metainterp_sd=None, original_greenkey=None):
+        self.metainterp_sd = metainterp_sd
+        self.original_greenkey = original_greenkey
+    def store_final_boxes(self, op, boxes):
+        op.setfailargs(boxes)
+    def __eq__(self, other):
+        return type(self) is type(other)      # xxx obscure
+    def clone_if_mutable(self):
+        res = Storage(self.metainterp_sd, self.original_greenkey)
+        self.copy_all_attributes_into(res)
+        return res
+
+def _sortboxes(boxes):
+    _kind2count = {history.INT: 1, history.REF: 2, history.FLOAT: 3}
+    return sorted(boxes, key=lambda box: _kind2count[box.type])
+
 class BaseTest(object):
-    invent_fail_descr = None
 
     def parse(self, s, boxkinds=None):
         return parse(s, self.cpu, self.namespace,
@@ -265,5 +344,40 @@
                      boxkinds=boxkinds,
                      invent_fail_descr=self.invent_fail_descr)
 
+    def invent_fail_descr(self, model, fail_args):
+        if fail_args is None:
+            return None
+        descr = Storage()
+        descr.rd_frame_info_list = resume.FrameInfo(None, "code", 11)
+        descr.rd_snapshot = resume.Snapshot(None, _sortboxes(fail_args))
+        return descr
+
+    def assert_equal(self, optimized, expected, text_right=None):
+        from pypy.jit.metainterp.optimizeopt.util import equaloplists
+        assert len(optimized.inputargs) == len(expected.inputargs)
+        remap = {}
+        for box1, box2 in zip(optimized.inputargs, expected.inputargs):
+            assert box1.__class__ == box2.__class__
+            remap[box2] = box1
+        assert equaloplists(optimized.operations,
+                            expected.operations, False, remap, text_right)
+
+    def _do_optimize_loop(self, loop, call_pure_results):
+        from pypy.jit.metainterp.optimizeopt import optimize_loop_1
+        from pypy.jit.metainterp.optimizeopt.util import args_dict
+
+        self.loop = loop
+        loop.call_pure_results = args_dict()
+        if call_pure_results is not None:
+            for k, v in call_pure_results.items():
+                loop.call_pure_results[list(k)] = v
+        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+        if hasattr(self, 'vrefinfo'):
+            metainterp_sd.virtualref_info = self.vrefinfo
+        if hasattr(self, 'callinfocollection'):
+            metainterp_sd.callinfocollection = self.callinfocollection
+        #
+        optimize_loop_1(metainterp_sd, loop, self.enable_opts)
+
 # ____________________________________________________________
 
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -5,7 +5,7 @@
 from pypy.jit.metainterp.resume import Snapshot
 from pypy.jit.metainterp.history import TreeLoop, LoopToken
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
-from pypy.jit.metainterp.optimizeutil import InvalidLoop, RetraceLoop
+from pypy.jit.metainterp.optimize import InvalidLoop, RetraceLoop
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.history import make_hashable_int
 from pypy.jit.codewriter.effectinfo import EffectInfo
@@ -546,7 +546,7 @@
             effectinfo = descr.get_extra_info()
             if effectinfo is not None:
                 if effectinfo.extraeffect == EffectInfo.EF_LOOPINVARIANT or \
-                   effectinfo.extraeffect == EffectInfo.EF_PURE:
+                   effectinfo.extraeffect == EffectInfo.EF_ELIDABLE:
                     return True
         return False
     
@@ -676,24 +676,28 @@
                             jumpop = self.optimizer.newoperations.pop()
                             assert jumpop.getopnum() == rop.JUMP
                             for guard in extra_guards:
-                                descr = sh.start_resumedescr.clone_if_mutable()
-                                self.inliner.inline_descr_inplace(descr)
-                                guard.setdescr(descr)
+                                d = sh.start_resumedescr.clone_if_mutable()
+                                self.inliner.inline_descr_inplace(d)
+                                guard.setdescr(d)
                                 self.emit_operation(guard)
                             self.optimizer.newoperations.append(jumpop)
                         return
-                retraced_count = len(short)
-                if descr.failed_states:
-                    retraced_count += len(descr.failed_states)
+                retraced_count = descr.retraced_count
+                descr.retraced_count += 1
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if not self.retraced and retraced_count<limit:
                     if not descr.failed_states:
+                        debug_print("Retracing (%d of %d)" % (retraced_count,
+                                                              limit))
                         raise RetraceLoop
                     for failed in descr.failed_states:
                         if failed.generalization_of(virtual_state):
                             # Retracing once more will most likely fail again
                             break
                     else:
+                        debug_print("Retracing (%d of %d)" % (retraced_count,
+                                                              limit))
+                                                              
                         raise RetraceLoop
                 else:
                     if not descr.failed_states:
diff --git a/pypy/jit/metainterp/optimizeutil.py b/pypy/jit/metainterp/optimizeopt/util.py
rename from pypy/jit/metainterp/optimizeutil.py
rename to pypy/jit/metainterp/optimizeopt/util.py
--- a/pypy/jit/metainterp/optimizeutil.py
+++ b/pypy/jit/metainterp/optimizeopt/util.py
@@ -1,21 +1,10 @@
+import py
 from pypy.rlib.objectmodel import r_dict, compute_identity_hash
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.jit.metainterp import resoperation, history
-from pypy.jit.metainterp.jitexc import JitException
 from pypy.rlib.debug import make_sure_not_resized
-
-class InvalidLoop(JitException):
-    """Raised when the optimize*.py detect that the loop that
-    we are trying to build cannot possibly make sense as a
-    long-running loop (e.g. it cannot run 2 complete iterations)."""
-
-class RetraceLoop(JitException):
-    """ Raised when inlining a short preamble resulted in an
-        InvalidLoop. This means the optimized loop is too specialized
-        to be useful here, so we trace it again and produced a second
-        copy specialized in some different way.
-    """
+from pypy.jit.metainterp.resoperation import rop
 
 # ____________________________________________________________
 # Misc. utilities
@@ -31,9 +20,25 @@
         if op_prefix and not name.startswith(op_prefix):
             continue
         if hasattr(Class, name_prefix + name):
-            result.append((value, getattr(Class, name_prefix + name)))
+            opclass = resoperation.opclasses[getattr(rop, name)]
+            print value, name, opclass
+            result.append((value, opclass, getattr(Class, name_prefix + name)))
     return unrolling_iterable(result)
 
+def make_dispatcher_method(Class, name_prefix, op_prefix=None, default=None):
+    ops = _findall(Class, name_prefix, op_prefix)
+    def dispatch(self, op, *args):
+        opnum = op.getopnum()
+        for value, cls, func in ops:
+            if opnum == value:
+                assert isinstance(op, cls)
+                return func(self, op, *args)
+        if default:
+            return default(self, op, *args)
+    dispatch.func_name = "dispatch_" + name_prefix
+    return dispatch
+
+
 def partition(array, left, right):
     last_item = array[right]
     pivot = last_item.sort_key()
@@ -113,3 +118,49 @@
 
 def args_dict_box():
     return r_dict(args_eq, args_hash)
+
+
+# ____________________________________________________________
+
+def equaloplists(oplist1, oplist2, strict_fail_args=True, remap={},
+                 text_right=None):
+    # try to use the full width of the terminal to display the list
+    # unfortunately, does not work with the default capture method of py.test
+    # (which is fd), you you need to use either -s or --capture=sys, else you
+    # get the standard 80 columns width
+    totwidth = py.io.get_terminal_width()
+    width = totwidth / 2 - 1
+    print ' Comparing lists '.center(totwidth, '-')
+    text_right = text_right or 'expected'
+    print '%s| %s' % ('optimized'.center(width), text_right.center(width))
+    for op1, op2 in zip(oplist1, oplist2):
+        txt1 = str(op1)
+        txt2 = str(op2)
+        while txt1 or txt2:
+            print '%s| %s' % (txt1[:width].ljust(width), txt2[:width])
+            txt1 = txt1[width:]
+            txt2 = txt2[width:]
+        assert op1.getopnum() == op2.getopnum()
+        assert op1.numargs() == op2.numargs()
+        for i in range(op1.numargs()):
+            x = op1.getarg(i)
+            y = op2.getarg(i)
+            assert x == remap.get(y, y)
+        if op2.result in remap:
+            assert op1.result == remap[op2.result]
+        else:
+            remap[op2.result] = op1.result
+        if op1.getopnum() != rop.JUMP:      # xxx obscure
+            assert op1.getdescr() == op2.getdescr()
+        if op1.getfailargs() or op2.getfailargs():
+            assert len(op1.getfailargs()) == len(op2.getfailargs())
+            if strict_fail_args:
+                for x, y in zip(op1.getfailargs(), op2.getfailargs()):
+                    assert x == remap.get(y, y)
+            else:
+                fail_args1 = set(op1.getfailargs())
+                fail_args2 = set([remap.get(y, y) for y in op2.getfailargs()])
+                assert fail_args1 == fail_args2
+    assert len(oplist1) == len(oplist2)
+    print '-'*totwidth
+    return True
diff --git a/pypy/jit/metainterp/optimizeopt/virtualize.py b/pypy/jit/metainterp/optimizeopt/virtualize.py
--- a/pypy/jit/metainterp/optimizeopt/virtualize.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualize.py
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.history import Const, ConstInt, BoxInt
 from pypy.jit.metainterp.resoperation import rop, ResOperation
-from pypy.jit.metainterp.optimizeutil import _findall, sort_descrs
-from pypy.jit.metainterp.optimizeutil import descrlist_dict
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from pypy.jit.metainterp.optimizeopt.util import descrlist_dict, sort_descrs
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp.optimizeopt import optimizer
 from pypy.jit.metainterp.executor import execute
@@ -20,6 +20,9 @@
         self.source_op = source_op  # the NEW_WITH_VTABLE/NEW_ARRAY operation
                                     # that builds this box
 
+    def is_forced_virtual(self):
+        return self.box is not None
+
     def get_key_box(self):
         if self.box is None:
             return self.keybox
@@ -120,7 +123,6 @@
                 op = ResOperation(rop.SETFIELD_GC, [box, subbox], None,
                                   descr=ofs)
                 newoperations.append(op)
-            self._fields = None
 
     def _get_field_descr_list(self):
         _cached_sorted_fields = self._cached_sorted_fields
@@ -351,7 +353,7 @@
         if not self.optimizer.cpu.ts.CONST_NULL.same_constant(objbox):
             seo(ResOperation(rop.SETFIELD_GC, op.getarglist(), None,
                              descr = vrefinfo.descr_forced))
-        
+
         # - set 'virtual_token' to TOKEN_NONE
         args = [op.getarg(0), ConstInt(vrefinfo.TOKEN_NONE)]
         seo(ResOperation(rop.SETFIELD_GC, args, None,
@@ -365,6 +367,14 @@
 
     def optimize_GETFIELD_GC(self, op):
         value = self.getvalue(op.getarg(0))
+        # If this is an immutable field (as indicated by op.is_always_pure())
+        # then it's safe to reuse the virtual's field, even if it has been
+        # forced, because it should never be written to again.
+        if value.is_forced_virtual() and op.is_always_pure():
+            fieldvalue = value.getfield(op.getdescr(), None)
+            if fieldvalue is not None:
+                self.make_equal_to(op.result, fieldvalue)
+                return
         if value.is_virtual():
             assert isinstance(value, AbstractVirtualValue)
             fieldvalue = value.getfield(op.getdescr(), None)
@@ -382,6 +392,7 @@
 
     def optimize_SETFIELD_GC(self, op):
         value = self.getvalue(op.getarg(0))
+
         if value.is_virtual():
             fieldvalue = self.getvalue(op.getarg(1))
             value.setfield(op.getdescr(), fieldvalue)
@@ -445,13 +456,8 @@
         ###self.heap_op_optimizer.optimize_SETARRAYITEM_GC(op, value, fieldvalue)
         self.emit_operation(op)
 
-    def propagate_forward(self, op):
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.emit_operation(op)
 
-optimize_ops = _findall(OptVirtualize, 'optimize_')
+dispatch_opt = make_dispatcher_method(OptVirtualize, 'optimize_',
+        default=OptVirtualize.emit_operation)
+
+OptVirtualize.propagate_forward = dispatch_opt
diff --git a/pypy/jit/metainterp/optimizeopt/string.py b/pypy/jit/metainterp/optimizeopt/vstring.py
rename from pypy/jit/metainterp/optimizeopt/string.py
rename to pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/string.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.optimizeopt import optimizer, virtualize
 from pypy.jit.metainterp.optimizeopt.optimizer import CONST_0, CONST_1
 from pypy.jit.metainterp.optimizeopt.optimizer import llhelper
-from pypy.jit.metainterp.optimizeutil import _findall
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.unroll import unrolling_iterable
@@ -61,7 +61,7 @@
         self.ensure_nonnull()
         box = self.force_box()
         lengthbox = BoxInt()
-        optimization.emit_operation(ResOperation(mode.STRLEN, [box], lengthbox))
+        optimization.optimize_default(ResOperation(mode.STRLEN, [box], lengthbox))
         return lengthbox
 
     @specialize.arg(1)
@@ -72,13 +72,13 @@
         else:
             return None
 
-    def string_copy_parts(self, optimization, targetbox, offsetbox, mode):
+    def string_copy_parts(self, optimizer, targetbox, offsetbox, mode):
         # Copies the pointer-to-string 'self' into the target string
         # given by 'targetbox', at the specified offset.  Returns the offset
         # at the end of the copy.
-        lengthbox = self.getstrlen(optimization, mode)
+        lengthbox = self.getstrlen(optimizer, mode)
         srcbox = self.force_box()
-        return copy_str_content(optimization, srcbox, targetbox,
+        return copy_str_content(optimizer, srcbox, targetbox,
                                 CONST_0, offsetbox, lengthbox, mode)
 
 
@@ -335,7 +335,7 @@
     if optimizer is None:
         return None
     resbox = BoxInt()
-    optimizer.emit_operation(ResOperation(rop.INT_ADD, [box1, box2], resbox))
+    optimizer.optimize_default(ResOperation(rop.INT_ADD, [box1, box2], resbox))
     return resbox
 
 def _int_sub(optimizer, box1, box2):
@@ -345,7 +345,7 @@
         if isinstance(box1, ConstInt):
             return ConstInt(box1.value - box2.value)
     resbox = BoxInt()
-    optimizer.emit_operation(ResOperation(rop.INT_SUB, [box1, box2], resbox))
+    optimizer.optimize_default(ResOperation(rop.INT_SUB, [box1, box2], resbox))
     return resbox
 
 def _strgetitem(optimizer, strbox, indexbox, mode):
@@ -357,7 +357,7 @@
             s = strbox.getref(lltype.Ptr(rstr.UNICODE))
             return ConstInt(ord(s.chars[indexbox.getint()]))
     resbox = BoxInt()
-    optimizer.emit_operation(ResOperation(mode.STRGETITEM, [strbox, indexbox],
+    optimizer.optimize_default(ResOperation(mode.STRGETITEM, [strbox, indexbox],
                                       resbox))
     return resbox
 
@@ -440,8 +440,7 @@
             if vindex.is_constant():
                 return value.getitem(vindex.box.getint())
         #
-        resbox = _strgetitem(self.optimizer,
-                             value.force_box(),vindex.force_box(), mode)
+        resbox = _strgetitem(self.optimizer, value.force_box(), vindex.force_box(), mode)
         return self.getvalue(resbox)
 
     def optimize_STRLEN(self, op):
@@ -451,7 +450,7 @@
 
     def _optimize_STRLEN(self, op, mode):
         value = self.getvalue(op.getarg(0))
-        lengthbox = value.getstrlen(self, mode)
+        lengthbox = value.getstrlen(self.optimizer, mode)
         self.make_equal_to(op.result, self.getvalue(lengthbox))
 
     def optimize_CALL(self, op):
@@ -650,16 +649,11 @@
             self.emit_operation(op)
             return
 
-        opnum = op.getopnum()
-        for value, func in optimize_ops:
-            if opnum == value:
-                func(self, op)
-                break
-        else:
-            self.emit_operation(op)
+        dispatch_opt(self, op)
 
 
-optimize_ops = _findall(OptString, 'optimize_')
+dispatch_opt = make_dispatcher_method(OptString, 'optimize_',
+        default=OptString.emit_operation)
 
 def _findall_call_oopspec():
     prefix = 'opt_call_stroruni_'
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -1,5 +1,5 @@
-import py, os, sys
-from pypy.rpython.lltypesystem import lltype, llmemory, rclass
+import py, sys
+from pypy.rpython.lltypesystem import lltype, rclass
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
@@ -15,13 +15,13 @@
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp.jitprof import GUARDS, RECORDED_OPS, ABORT_ESCAPE
 from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG, ABORT_BRIDGE, \
-                                        ABORT_BAD_LOOP, ABORT_FORCE_QUASIIMMUT
+                                        ABORT_FORCE_QUASIIMMUT
 from pypy.jit.metainterp.jitexc import JitException, get_llexception
-from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.objectmodel import specialize
-from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr, MissingLiveness
-from pypy.jit.codewriter import heaptracker, longlong
-from pypy.jit.metainterp.optimizeutil import RetraceLoop, args_dict_box, args_dict
+from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
+from pypy.jit.codewriter import heaptracker
+from pypy.jit.metainterp.optimizeopt.util import args_dict_box
+from pypy.jit.metainterp.optimize import RetraceLoop
 
 # ____________________________________________________________
 
@@ -310,26 +310,27 @@
                 self.opimpl_goto_if_not(condbox, target)
         ''' % (_opimpl, _opimpl.upper())).compile()
 
+
+    def _establish_nullity(self, box, orgpc):
+        value = box.nonnull()
+        if value:
+            if box not in self.metainterp.known_class_boxes:
+                self.generate_guard(rop.GUARD_NONNULL, box, resumepc=orgpc)
+        else:
+            if not isinstance(box, Const):
+                self.generate_guard(rop.GUARD_ISNULL, box, resumepc=orgpc)
+                promoted_box = box.constbox()
+                self.metainterp.replace_box(box, promoted_box)
+        return value
+
     @arguments("orgpc", "box", "label")
     def opimpl_goto_if_not_ptr_nonzero(self, orgpc, box, target):
-        value = box.nonnull()
-        if value:
-            opnum = rop.GUARD_NONNULL
-        else:
-            opnum = rop.GUARD_ISNULL
-        self.generate_guard(opnum, box, resumepc=orgpc)
-        if not value:
+        if not self._establish_nullity(box, orgpc):
             self.pc = target
 
     @arguments("orgpc", "box", "label")
     def opimpl_goto_if_not_ptr_iszero(self, orgpc, box, target):
-        value = box.nonnull()
-        if value:
-            opnum = rop.GUARD_NONNULL
-        else:
-            opnum = rop.GUARD_ISNULL
-        self.generate_guard(opnum, box, resumepc=orgpc)
-        if value:
+        if self._establish_nullity(box, orgpc):
             self.pc = target
 
     @arguments("box", "box", "box")
@@ -364,7 +365,9 @@
     def opimpl_new_with_vtable(self, sizedescr):
         cpu = self.metainterp.cpu
         cls = heaptracker.descr2vtable(cpu, sizedescr)
-        return self.execute(rop.NEW_WITH_VTABLE, ConstInt(cls))
+        resbox = self.execute(rop.NEW_WITH_VTABLE, ConstInt(cls))
+        self.metainterp.known_class_boxes[resbox] = None
+        return resbox
 
 ##    @FixME  #arguments("box")
 ##    def opimpl_runtimenew(self, classbox):
@@ -845,7 +848,9 @@
     @arguments("orgpc", "box")
     def opimpl_guard_class(self, orgpc, box):
         clsbox = self.cls_of_box(box)
-        self.generate_guard(rop.GUARD_CLASS, box, [clsbox], resumepc=orgpc)
+        if box not in self.metainterp.known_class_boxes:
+            self.generate_guard(rop.GUARD_CLASS, box, [clsbox], resumepc=orgpc)
+            self.metainterp.known_class_boxes[box] = None
         return clsbox
 
     @arguments("int", "orgpc")
@@ -867,7 +872,7 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jitdriver_sd, self.metainterp.in_recursion,
+        self.debug_merge_point(jitdriver_sd, jdindex, self.metainterp.in_recursion,
                                greenboxes)
 
         if self.metainterp.seen_loop_header_for_jdindex < 0:
@@ -914,13 +919,12 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, in_recursion, greenkey):
+    def debug_merge_point(self, jitdriver_sd, jd_index, in_recursion, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
         loc = jitdriver_sd.warmstate.get_location_str(greenkey)
         debug_print(loc)
-        constloc = self.metainterp.cpu.ts.conststr(loc)
-        self.metainterp.history.record(rop.DEBUG_MERGE_POINT,
-                                       [constloc, ConstInt(in_recursion)], None)
+        args = [ConstInt(jd_index), ConstInt(in_recursion)] + greenkey
+        self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
     def opimpl_goto_if_exception_mismatch(self, vtablebox, next_exc_target):
@@ -1234,7 +1238,7 @@
             effect = effectinfo.extraeffect
             if effect == effectinfo.EF_CANNOT_RAISE:
                 return self.execute_varargs(rop.CALL, allboxes, descr, False)
-            elif effect == effectinfo.EF_PURE:
+            elif effect == effectinfo.EF_ELIDABLE:
                 return self.metainterp.record_result_of_call_pure(
                     self.execute_varargs(rop.CALL, allboxes, descr, False))
             elif effect == effectinfo.EF_LOOPINVARIANT:
@@ -1265,8 +1269,7 @@
     logger_ops = None
 
     def __init__(self, cpu, options,
-                 ProfilerClass=EmptyProfiler, warmrunnerdesc=None,
-                 jit_ffi=True):
+                 ProfilerClass=EmptyProfiler, warmrunnerdesc=None):
         self.cpu = cpu
         self.stats = self.cpu.stats
         self.options = options
@@ -1276,7 +1279,11 @@
         self.profiler = ProfilerClass()
         self.profiler.cpu = cpu
         self.warmrunnerdesc = warmrunnerdesc
-        self.jit_ffi = jit_ffi
+        if warmrunnerdesc:
+            self.config = warmrunnerdesc.translator.config
+        else:
+            from pypy.config.pypyoption import get_pypy_config
+            self.config = get_pypy_config(translating=True)
 
         backendmodule = self.cpu.__module__
         backendmodule = backendmodule.split('.')[-2]
@@ -1447,6 +1454,8 @@
         self.last_exc_value_box = None
         self.retracing_loop_from = None
         self.call_pure_results = args_dict_box()
+        # contains boxes where the class is already known
+        self.known_class_boxes = {}
 
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
@@ -1787,6 +1796,8 @@
                 duplicates[box] = None
 
     def reached_loop_header(self, greenboxes, redboxes, resumedescr):
+        self.known_class_boxes = {}
+
         duplicates = {}
         self.remove_consts_and_duplicates(redboxes, len(redboxes),
                                           duplicates)
@@ -1927,7 +1938,6 @@
 
         self.history.inputargs = original_inputargs
         self.history.operations.pop()     # remove the JUMP
-        # FIXME: Why is self.history.inputargs not restored?
 
     def compile_bridge(self, live_arg_boxes):
         num_green_args = self.jitdriver_sd.num_green_args
@@ -1963,6 +1973,8 @@
                                               start_resumedescr, False)
         self.history.operations.pop()     # remove the JUMP
         if loop_token is None:
+            self.history.inputargs = original_inputargs
+            self.history.operations = original_operations
             return
 
         if loop_token.short_preamble:
@@ -2117,7 +2129,6 @@
     def vrefs_after_residual_call(self):
         vrefinfo = self.staticdata.virtualref_info
         for i in range(0, len(self.virtualref_boxes), 2):
-            virtualbox = self.virtualref_boxes[i]
             vrefbox = self.virtualref_boxes[i+1]
             vref = vrefbox.getref_base()
             if vrefinfo.tracing_after_residual_call(vref):
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -191,9 +191,15 @@
         # of the operation.  It must inherit from AbstractDescr.  The
         # backend provides it with cpu.fielddescrof(), cpu.arraydescrof(),
         # cpu.calldescrof(), and cpu.typedescrof().
+        self._check_descr(descr)
+        self._descr = descr
+
+    def _check_descr(self, descr):
+        if not we_are_translated() and getattr(descr, 'I_am_a_descr', False):
+            return # needed for the mock case in oparser_model
         from pypy.jit.metainterp.history import check_descr
         check_descr(descr)
-        self._descr = descr
+
 
 class GuardResOp(ResOpWithDescr):
 
@@ -468,8 +474,9 @@
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
-    'DEBUG_MERGE_POINT/2',      # debugging only
+    'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier)
+    'COND_CALL_GC_WB_ARRAY/3d', # [objptr, arrayindex, newvalue] (write barr.)
+    'DEBUG_MERGE_POINT/*',      # debugging only
     'JIT_DEBUG/*',              # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
     'COPYSTRCONTENT/5',       # src, dst, srcstart, dststart, length
@@ -482,6 +489,7 @@
     'CALL_ASSEMBLER/*d',  # call already compiled assembler
     'CALL_MAY_FORCE/*d',
     'CALL_LOOPINVARIANT/*d',
+    'CALL_RELEASE_GIL/*d',  # release the GIL and "close the stack" for asmgcc
     #'OOSEND',                     # ootype operation
     #'OOSEND_PURE',                # ootype operation
     'CALL_PURE/*d',             # removed before it's passed to the backend
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -2,15 +2,17 @@
 from pypy.jit.metainterp.history import Box, Const, ConstInt, getkind
 from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat
 from pypy.jit.metainterp.history import INT, REF, FLOAT, HOLE
+from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp import jitprof
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rstr
+from pypy.rpython import annlowlevel
 from pypy.rlib import rarithmetic, rstack
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.rlib.debug import have_debug_prints, ll_assert
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
-from pypy.jit.metainterp.optimizeutil import InvalidLoop
+from pypy.jit.metainterp.optimize import InvalidLoop
 
 # Logic to encode the chain of frames and the state of the boxes at a
 # guard operation, and to decode it again.  This is a bit advanced,
@@ -82,6 +84,13 @@
                             ('nums', lltype.Array(rffi.SHORT)))
 NUMBERINGP.TO.become(NUMBERING)
 
+PENDINGFIELDSTRUCT = lltype.Struct('PendingField',
+                                   ('lldescr', annlowlevel.base_ptr_lltype()),
+                                   ('num', rffi.SHORT),
+                                   ('fieldnum', rffi.SHORT),
+                                   ('itemindex', rffi.INT))
+PENDINGFIELDSP = lltype.Ptr(lltype.GcArray(PENDINGFIELDSTRUCT))
+
 TAGMASK = 3
 
 def tag(value, tagbits):
@@ -329,7 +338,7 @@
                 value = values[box]
                 value.get_args_for_fail(self)
 
-        for _, box, fieldbox in pending_setfields:
+        for _, box, fieldbox, _ in pending_setfields:
             self.register_box(box)
             self.register_box(fieldbox)
             value = values[fieldbox]
@@ -405,13 +414,25 @@
         return False
 
     def _add_pending_fields(self, pending_setfields):
-        rd_pendingfields = None
+        rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
         if pending_setfields:
-            rd_pendingfields = []
-            for descr, box, fieldbox in pending_setfields:
+            n = len(pending_setfields)
+            rd_pendingfields = lltype.malloc(PENDINGFIELDSP.TO, n)
+            for i in range(n):
+                descr, box, fieldbox, itemindex = pending_setfields[i]
+                lldescr = annlowlevel.cast_instance_to_base_ptr(descr)
                 num = self._gettagged(box)
                 fieldnum = self._gettagged(fieldbox)
-                rd_pendingfields.append((descr, num, fieldnum))
+                # the index is limited to 2147483647 (64-bit machines only)
+                if itemindex > 2147483647:
+                    from pypy.jit.metainterp import compile
+                    compile.giveup()
+                itemindex = rffi.cast(rffi.INT, itemindex)
+                #
+                rd_pendingfields[i].lldescr  = lldescr
+                rd_pendingfields[i].num      = num
+                rd_pendingfields[i].fieldnum = fieldnum
+                rd_pendingfields[i].itemindex= itemindex
         self.storage.rd_pendingfields = rd_pendingfields
 
     def _gettagged(self, box):
@@ -727,10 +748,28 @@
             self.virtuals_cache = [self.virtual_default] * len(virtuals)
 
     def _prepare_pendingfields(self, pendingfields):
-        if pendingfields is not None:
-            for descr, num, fieldnum in pendingfields:
+        if pendingfields:
+            for i in range(len(pendingfields)):
+                lldescr  = pendingfields[i].lldescr
+                num      = pendingfields[i].num
+                fieldnum = pendingfields[i].fieldnum
+                itemindex= pendingfields[i].itemindex
+                descr = annlowlevel.cast_base_ptr_to_instance(AbstractDescr,
+                                                              lldescr)
                 struct = self.decode_ref(num)
-                self.setfield(descr, struct, fieldnum)
+                itemindex = rffi.cast(lltype.Signed, itemindex)
+                if itemindex < 0:
+                    self.setfield(descr, struct, fieldnum)
+                else:
+                    self.setarrayitem(descr, struct, itemindex, fieldnum)
+
+    def setarrayitem(self, arraydescr, array, index, fieldnum):
+        if arraydescr.is_array_of_pointers():
+            self.setarrayitem_ref(arraydescr, array, index, fieldnum)
+        elif arraydescr.is_array_of_floats():
+            self.setarrayitem_float(arraydescr, array, index, fieldnum)
+        else:
+            self.setarrayitem_int(arraydescr, array, index, fieldnum)
 
     def _prepare_next_section(self, info):
         # Use info.enumerate_vars(), normally dispatching to
@@ -903,15 +942,15 @@
                                            structbox, fieldbox)
 
     def setarrayitem_int(self, arraydescr, arraybox, index, fieldnum):
-        self.setarrayitem(arraydescr, arraybox, index, fieldnum, INT)
+        self._setarrayitem(arraydescr, arraybox, index, fieldnum, INT)
 
     def setarrayitem_ref(self, arraydescr, arraybox, index, fieldnum):
-        self.setarrayitem(arraydescr, arraybox, index, fieldnum, REF)
+        self._setarrayitem(arraydescr, arraybox, index, fieldnum, REF)
 
     def setarrayitem_float(self, arraydescr, arraybox, index, fieldnum):
-        self.setarrayitem(arraydescr, arraybox, index, fieldnum, FLOAT)
+        self._setarrayitem(arraydescr, arraybox, index, fieldnum, FLOAT)
 
-    def setarrayitem(self, arraydescr, arraybox, index, fieldnum, kind):
+    def _setarrayitem(self, arraydescr, arraybox, index, fieldnum, kind):
         itembox = self.decode_box(fieldnum, kind)
         self.metainterp.execute_and_record(rop.SETARRAYITEM_GC,
                                            arraydescr, arraybox,
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -15,14 +15,14 @@
                   supports_longlong=False, **kwds):
     from pypy.jit.codewriter import support
 
-    class FakeJitCell:
+    class FakeJitCell(object):
         __compiled_merge_points = []
         def get_compiled_merge_points(self):
             return self.__compiled_merge_points[:]
         def set_compiled_merge_points(self, lst):
             self.__compiled_merge_points = lst
 
-    class FakeWarmRunnerState:
+    class FakeWarmRunnerState(object):
         def attach_unoptimized_bridge_from_interp(self, greenkey, newloop):
             pass
 
@@ -30,6 +30,9 @@
             from pypy.rpython.annlowlevel import llhelper
             return llhelper(FUNCPTR, func)
 
+        def get_location_str(self, args):
+            return 'location'
+
         def jit_cell_at_key(self, greenkey):
             assert greenkey == []
             return self._cell
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -1,7 +1,7 @@
 import py
 import sys
 from pypy.rlib.jit import JitDriver, we_are_jitted, hint, dont_look_inside
-from pypy.rlib.jit import loop_invariant
+from pypy.rlib.jit import loop_invariant, elidable, promote
 from pypy.rlib.jit import jit_debug, assert_green, AssertGreenFailed
 from pypy.rlib.jit import unroll_safe, current_trace_length
 from pypy.jit.metainterp import pyjitpl, history
@@ -304,12 +304,12 @@
         assert res == 42
         self.check_operations_history(int_add=1, int_mul=0, call=1, guard_no_exception=0)
 
-    def test_residual_call_pure(self):
+    def test_residual_call_elidable(self):
         def externfn(x, y):
             return x * y
-        externfn._pure_function_ = True
+        externfn._elidable_function_ = True
         def f(n):
-            n = hint(n, promote=True)
+            promote(n)
             return externfn(n, n+1)
         res = self.interp_operations(f, [6])
         assert res == 42
@@ -317,10 +317,10 @@
         self.check_operations_history(int_add=0, int_mul=0,
                                       call=0, call_pure=0)
 
-    def test_residual_call_pure_1(self):
+    def test_residual_call_elidable_1(self):
+        @elidable
         def externfn(x, y):
             return x * y
-        externfn._pure_function_ = True
         def f(n):
             return externfn(n, n+1)
         res = self.interp_operations(f, [6])
@@ -329,11 +329,11 @@
         self.check_operations_history(int_add=1, int_mul=0,
                                       call=0, call_pure=1)
 
-    def test_residual_call_pure_2(self):
+    def test_residual_call_elidable_2(self):
         myjitdriver = JitDriver(greens = [], reds = ['n'])
+        @elidable
         def externfn(x):
             return x - 1
-        externfn._pure_function_ = True
         def f(n):
             while n > 0:
                 myjitdriver.can_enter_jit(n=n)
@@ -346,11 +346,11 @@
         # by optimizeopt.py
         self.check_loops(int_sub=0, call=1, call_pure=0)
 
-    def test_constfold_call_pure(self):
+    def test_constfold_call_elidable(self):
         myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
+        @elidable
         def externfn(x):
             return x - 3
-        externfn._pure_function_ = True
         def f(n, m):
             while n > 0:
                 myjitdriver.can_enter_jit(n=n, m=m)
@@ -362,11 +362,11 @@
         # the CALL_PURE is constant-folded away by optimizeopt.py
         self.check_loops(int_sub=1, call=0, call_pure=0)
 
-    def test_constfold_call_pure_2(self):
+    def test_constfold_call_elidable_2(self):
         myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
+        @elidable
         def externfn(x):
             return x - 3
-        externfn._pure_function_ = True
         class V:
             def __init__(self, value):
                 self.value = value
@@ -382,19 +382,19 @@
         # the CALL_PURE is constant-folded away by optimizeopt.py
         self.check_loops(int_sub=1, call=0, call_pure=0)
 
-    def test_pure_function_returning_object(self):
+    def test_elidable_function_returning_object(self):
         myjitdriver = JitDriver(greens = ['m'], reds = ['n'])
         class V:
             def __init__(self, x):
                 self.x = x
         v1 = V(1)
         v2 = V(2)
+        @elidable
         def externfn(x):
             if x:
                 return v1
             else:
                 return v2
-        externfn._pure_function_ = True
         def f(n, m):
             while n > 0:
                 myjitdriver.can_enter_jit(n=n, m=m)
@@ -500,7 +500,7 @@
                 y -= x
             return y
         #
-        res = self.meta_interp(f, [3, 6], repeat=7)
+        res = self.meta_interp(f, [3, 6], repeat=7, function_threshold=0)
         assert res == 6 - 4 - 5
         self.check_history(call=0)   # because the trace starts in the middle
         #
@@ -984,11 +984,14 @@
             pass
         class B(A):
             pass
+        @dont_look_inside
+        def extern(n):
+            if n:
+                return A()
+            else:
+                return B()
         def fn(n):
-            if n:
-                obj = A()
-            else:
-                obj = B()
+            obj = extern(n)
             return isinstance(obj, B)
         res = self.interp_operations(fn, [0])
         assert res
@@ -1021,6 +1024,70 @@
         res = self.meta_interp(main, [])
         assert res == 55
 
+    def test_dont_record_repeated_guard_class(self):
+        class A:
+            pass
+        class B(A):
+            pass
+        @dont_look_inside
+        def extern(n):
+            if n == -7:
+                return None
+            elif n:
+                return A()
+            else:
+                return B()
+        def fn(n):
+            obj = extern(n)
+            return isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B)
+        res = self.interp_operations(fn, [0])
+        assert res == 4
+        self.check_operations_history(guard_class=1, guard_nonnull=1)
+        res = self.interp_operations(fn, [1])
+        assert not res
+
+    def test_dont_record_guard_class_after_new(self):
+        class A:
+            pass
+        class B(A):
+            pass
+        def fn(n):
+            if n == -7:
+                obj = None
+            elif n:
+                obj = A()
+            else:
+                obj = B()
+            return isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B) + isinstance(obj, B)
+        res = self.interp_operations(fn, [0])
+        assert res == 4
+        self.check_operations_history(guard_class=0, guard_nonnull=0)
+        res = self.interp_operations(fn, [1])
+        assert not res
+
+    def test_guard_isnull_nullifies(self):
+        class A:
+            pass
+        a = A()
+        a.x = None
+        def fn(n):
+            if n == -7:
+                a.x = ""
+            obj = a.x
+            res = 0
+            if not obj:
+                res += 1
+            if obj:
+                res += 1
+            if obj is None:
+                res += 1
+            if obj is not None:
+                res += 1
+            return res
+        res = self.interp_operations(fn, [0])
+        assert res == 2
+        self.check_operations_history(guard_isnull=1)
+
     def test_assert_isinstance(self):
         class A:
             pass
@@ -1252,7 +1319,7 @@
                 myjitdriver.jit_merge_point(x=x, l=l)
                 a = l[x]
                 x = a.g(x)
-                hint(a, promote=True)
+                promote(a)
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
@@ -1312,7 +1379,7 @@
                     x -= 5
                 else:
                     x -= 7
-                hint(a, promote=True)
+                promote(a)
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
@@ -1343,7 +1410,7 @@
                     x -= 5
                 else:
                     x -= 7
-                hint(a, promote=True)
+                promote(a)
             return x
         res = self.meta_interp(f, [299], listops=True)
         assert res == f(299)
@@ -1377,7 +1444,7 @@
                     x = a.g(x)
                 else:
                     x -= 7
-                hint(a, promote=True)
+                promote(a)
             return x
         res = self.meta_interp(f, [399], listops=True)
         assert res == f(399)
@@ -1496,7 +1563,7 @@
                     glob.a = B()
                     const = 2
                 else:
-                    const = hint(const, promote=True)
+                    promote(const)
                     x -= const
                     res += a.x
                     a = None
@@ -1531,7 +1598,7 @@
                 myjitdriver.can_enter_jit(x=x)
                 myjitdriver.jit_merge_point(x=x)
                 a = A()
-                hint(a, promote=True)
+                promote(a)
                 x -= 1
         self.meta_interp(f, [50])
         self.check_loop_count(1)
@@ -1595,9 +1662,9 @@
         self.check_loops(jit_debug=2)
 
     def test_assert_green(self):
-        def f(x, promote):
-            if promote:
-                x = hint(x, promote=True)
+        def f(x, promote_flag):
+            if promote_flag:
+                promote(x)
             assert_green(x)
             return x
         res = self.interp_operations(f, [8, 1])
@@ -1676,7 +1743,9 @@
             return a1.val + b1.val
         res = self.meta_interp(g, [6, 14])
         assert res == g(6, 14)
-        self.check_loop_count(9)
+        self.check_loop_count(8)
+        self.check_loops(getarrayitem_gc=7, everywhere=True)
+        py.test.skip("for the following, we need setarrayitem(varindex)")
         self.check_loops(getarrayitem_gc=6, everywhere=True)
 
     def test_multiple_specialied_versions_bridge(self):
@@ -1815,7 +1884,7 @@
             while y > 0:
                 myjitdriver.can_enter_jit(y=y, x=x, res=res, const=const)
                 myjitdriver.jit_merge_point(y=y, x=x, res=res, const=const)
-                const = hint(const, promote=True)
+                const = promote(const)
                 res = res.binop(A(const))
                 if y<7:
                     res = x
@@ -2000,7 +2069,7 @@
             n = sa = 0
             while n < 10:
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
-                if 0 < a < hint(sys.maxint/2, promote=True): pass
+                if 0 < a < promote(sys.maxint/2): pass
                 if 0 < b < 100: pass
                 sa += (((((a << b) << b) << b) >> b) >> b) >> b                
                 n += 1
@@ -2045,7 +2114,7 @@
             n = sa = 0
             while n < 10:
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
-                if -hint(sys.maxint/2, promote=True) < a < 0: pass
+                if -promote(sys.maxint/2) < a < 0: pass
                 if 0 < b < 100: pass
                 sa += (((((a << b) << b) << b) >> b) >> b) >> b                
                 n += 1
@@ -2080,7 +2149,7 @@
             n = sa = 0
             while n < 10:
                 myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
-                if 0 < a < hint(sys.maxint/2, promote=True): pass
+                if 0 < a < promote(sys.maxint/2): pass
                 if 0 < b < 100: pass
                 sa += (a << b) >> b
                 n += 1
@@ -2137,7 +2206,7 @@
                 if op == 'j':
                     j += 1
                 elif op == 'c':
-                    c = hint(c, promote=True)
+                    promote(c)
                     c = 1 - c
                 elif op == '2':
                     if j < 3:
@@ -2206,7 +2275,8 @@
                 self.local_names[0] = 1
 
             def retrieve(self):
-                variables = hint(self.variables, promote=True)
+                variables = self.variables
+                promote(variables)
                 result = self.local_names[0]
                 if result == 0:
                     return -1
@@ -2230,6 +2300,148 @@
         self.check_loops(getfield_gc_pure=0)
         self.check_loops(getfield_gc_pure=2, everywhere=True)
         
+    def test_frame_finished_during_retrace(self):
+        class Base(object):
+            pass
+        class A(Base):
+            def __init__(self, a):
+                self.val = a
+                self.num = 1
+            def inc(self):
+                return A(self.val + 1)
+        class B(Base):
+            def __init__(self, a):
+                self.val = a
+                self.num = 1000
+            def inc(self):
+                return B(self.val + 1)
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'a'])
+        def f():
+            myjitdriver.set_param('threshold', 3)
+            myjitdriver.set_param('trace_eagerness', 2)
+            a = A(0)
+            sa = 0
+            while a.val < 8:
+                myjitdriver.jit_merge_point(a=a, sa=sa)
+                a = a.inc()
+                if a.val > 4:
+                    a = B(a.val)
+                sa += a.num
+            return sa
+        res = self.meta_interp(f, [])
+        assert res == f()
+        
+    def test_frame_finished_during_continued_retrace(self):
+        class Base(object):
+            pass
+        class A(Base):
+            def __init__(self, a):
+                self.val = a
+                self.num = 100
+            def inc(self):
+                return A(self.val + 1)
+        class B(Base):
+            def __init__(self, a):
+                self.val = a
+                self.num = 10000
+            def inc(self):
+                return B(self.val + 1)
+        myjitdriver = JitDriver(greens = [], reds = ['sa', 'b', 'a'])
+        def f(b):
+            myjitdriver.set_param('threshold', 6)
+            myjitdriver.set_param('trace_eagerness', 4)
+            a = A(0)
+            sa = 0
+            while a.val < 15:
+                myjitdriver.jit_merge_point(a=a, b=b, sa=sa)
+                a = a.inc()
+                if a.val > 8:
+                    a = B(a.val)
+                if b == 1:
+                    b = 2
+                else:
+                    b = 1
+                sa += a.num + b
+            return sa
+        res = self.meta_interp(f, [1])
+        assert res == f(1)
+
+    def test_remove_array_operations(self):
+        myjitdriver = JitDriver(greens = [], reds = ['a'])
+        class W_Int:
+            def __init__(self, intvalue):
+                self.intvalue = intvalue
+        def f(x):
+            a = [W_Int(x)]
+            while a[0].intvalue > 0:
+                myjitdriver.jit_merge_point(a=a)
+                a[0] = W_Int(a[0].intvalue - 3)
+            return a[0].intvalue
+        res = self.meta_interp(f, [100])
+        assert res == -2
+        #self.check_loops(getarrayitem_gc=0, setarrayitem_gc=0) -- xxx?
+
+    def test_retrace_ending_up_retrazing_another_loop(self):
+
+        myjitdriver = JitDriver(greens = ['pc'], reds = ['n', 'i', 'sa'])
+        bytecode = "0+sI0+SI"
+        def f(n):
+            myjitdriver.set_param('threshold', 3)
+            myjitdriver.set_param('trace_eagerness', 1)
+            myjitdriver.set_param('retrace_limit', 5)
+            myjitdriver.set_param('function_threshold', -1)
+            pc = sa = i = 0
+            while pc < len(bytecode):
+                myjitdriver.jit_merge_point(pc=pc, n=n, sa=sa, i=i)
+                n = hint(n, promote=True)
+                op = bytecode[pc]
+                if op == '0':
+                    i = 0
+                elif op == '+':
+                    i += 1
+                elif op == 's':
+                    sa += i
+                elif op == 'S':
+                    sa += 2
+                elif op == 'I':
+                    if i < n:
+                        pc -= 2
+                        myjitdriver.can_enter_jit(pc=pc, n=n, sa=sa, i=i)
+                        continue
+                pc += 1
+            return sa
+
+        def g(n1, n2):
+            for i in range(10):
+                f(n1)
+            for i in range(10):                
+                f(n2)
+
+        nn = [10, 3]
+        assert self.meta_interp(g, nn) == g(*nn)
+        
+        # The attempts of retracing first loop will end up retracing the
+        # second and thus fail 5 times, saturating the retrace_count. Instead a
+        # bridge back to the preamble of the first loop is produced. A guard in
+        # this bridge is later traced resulting in a retrace of the second loop.
+        # Thus we end up with:
+        #   1 preamble and 1 specialized version of first loop
+        #   1 preamble and 2 specialized version of second loop
+        self.check_tree_loop_count(2 + 3)
+
+        # FIXME: Add a gloabl retrace counter and test that we are not trying more than 5 times.
+        
+        def g(n):
+            for i in range(n):
+                for j in range(10):
+                    f(n-i)
+
+        res = self.meta_interp(g, [10])
+        assert res == g(10)
+        # 1 preamble and 6 speciealized versions of each loop
+        self.check_tree_loop_count(2*(1 + 6))
+
+
 class TestOOtype(BasicTests, OOJitMixin):
 
     def test_oohash(self):
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -1,3 +1,4 @@
+from pypy.config.pypyoption import get_pypy_config
 from pypy.jit.metainterp.history import LoopToken, ConstInt, History, Stats
 from pypy.jit.metainterp.history import BoxInt, INT
 from pypy.jit.metainterp.compile import insert_loop_token, compile_new_loop
@@ -5,7 +6,7 @@
 from pypy.jit.metainterp.compile import ResumeGuardCountersInt
 from pypy.jit.metainterp.compile import compile_tmp_callback
 from pypy.jit.metainterp import jitprof, typesystem, compile
-from pypy.jit.metainterp.test.test_optimizeutil import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from pypy.jit.tool.oparser import parse
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
 
@@ -30,13 +31,16 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token):
+    def compile_loop(self, inputargs, operations, token, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
     def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         pass
 
+    def repr_of_resop(self, op):
+        return repr(op)
+
 class FakeState(object):
     enable_opts = ALL_OPTS_DICT.copy()
     enable_opts.pop('unroll')
@@ -44,6 +48,9 @@
     def attach_unoptimized_bridge_from_interp(*args):
         pass
 
+    def get_location_str(self, args):
+        return 'location'
+
 class FakeGlobalData(object):
     loopnumbering = 0
 
@@ -51,11 +58,11 @@
     
     logger_noopt = FakeLogger()
     logger_ops = FakeLogger()
+    config = get_pypy_config(translating=True)
 
     stats = Stats()
     profiler = jitprof.EmptyProfiler()
     warmrunnerdesc = None
-    jit_ffi = False
     def log(self, msg, event_kind=None):
         pass
 
@@ -63,6 +70,8 @@
     call_pure_results = {}
     class jitdriver_sd:
         warmstate = FakeState()
+        on_compile = staticmethod(lambda *args: None)
+        on_compile_bridge = staticmethod(lambda *args: None)
 
 def test_compile_new_loop():
     cpu = FakeCPU()
diff --git a/pypy/jit/metainterp/test/test_dict.py b/pypy/jit/metainterp/test/test_dict.py
--- a/pypy/jit/metainterp/test/test_dict.py
+++ b/pypy/jit/metainterp/test/test_dict.py
@@ -130,6 +130,38 @@
         assert res == 50
         self.check_loops(int_mod=1)
 
+    def test_repeated_lookup(self):
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'd'])
+        class Wrapper(object):
+            _immutable_fields_ = ["value"]
+            def __init__(self, value):
+                self.value = value
+        def eq_func(a, b):
+            return a.value == b.value
+        def hash_func(x):
+            return objectmodel.compute_hash(x.value)
+
+        def f(n):
+            d = None
+            while n > 0:
+                myjitdriver.jit_merge_point(n=n, d=d)
+                d = objectmodel.r_dict(eq_func, hash_func)
+                y = Wrapper(str(n))
+                d[y] = n - 1
+                n = d[y]
+            return d[Wrapper(str(n + 1))]
+
+        res = self.meta_interp(f, [100], listops=True)
+        assert res == f(50)
+        # XXX: ideally there would be 7 calls here, but repeated CALL_PURE with
+        # the same arguments are not folded, because we have conflicting
+        # definitions of pure, once strhash can be appropriately folded
+        # this should be decreased to seven.
+        self.check_loops({"call": 8, "guard_false": 1, "guard_no_exception": 5,
+                          "guard_true": 1, "int_and": 1, "int_gt": 1,
+                          "int_is_true": 1, "int_sub": 1, "jump": 1,
+                          "new_with_vtable": 1, "setfield_gc": 1})
+
 
 class TestOOtype(DictTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -1,28 +1,46 @@
 
 import py
-from pypy.rlib.jit import JitDriver, hint
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.jit import JitDriver, promote, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain
+from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.objectmodel import specialize
+from pypy.tool.sourcetools import func_with_new_name
 from pypy.jit.metainterp.test.support import LLJitMixin
 
-
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
         """
         #
         lib, name, argtypes, restype = funcspec
-        args = unrolling_iterable(args)
+        method_and_args = []
+        for argval in args:
+            if type(argval) is r_singlefloat:
+                method_name = 'arg_singlefloat'
+                argval = float(argval)
+            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
+                method_name = 'arg_longlong'
+                argval = rffi.cast(rffi.LONGLONG, argval)
+                argval = longlong2float(argval)
+            elif isinstance(argval, tuple):
+                method_name, argval = argval
+            else:
+                method_name = 'arg'
+            method_and_args.append((method_name, argval))
+        method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if type(init_result) is float:
+        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+            IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
             reds = ['n', 'func', 'res'] # floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
         #
@@ -31,15 +49,19 @@
             res = init_result
             while n < 10:
                 driver.jit_merge_point(n=n, res=res, func=func)
-                driver.can_enter_jit(n=n, res=res, func=func)
-                func = hint(func, promote=True)
+                promote(func)
                 argchain = ArgChain()
-                for argval in args: # this loop is unrolled
-                    argchain.arg(argval)
-                res = func.call(argchain, RESULT)
+                # this loop is unrolled
+                for method_name, argval in method_and_args:
+                    getattr(argchain, method_name)(argval)
+                res = func.call(argchain, RESULT, is_struct=is_struct)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0])
+        res = self.meta_interp(f, [0], backendopt=True)
         return res
 
+    def test_byval_result(self):
+        _TestLibffiCall.test_byval_result(self)
+    test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
+    test_byval_result.dont_track_allocations = True
diff --git a/pypy/jit/metainterp/test/test_history.py b/pypy/jit/metainterp/test/test_history.py
--- a/pypy/jit/metainterp/test/test_history.py
+++ b/pypy/jit/metainterp/test/test_history.py
@@ -1,5 +1,5 @@
 from pypy.jit.metainterp.history import *
-from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 
 
 def test_repr():
@@ -10,6 +10,18 @@
     const = ConstPtr(lltype.cast_opaque_ptr(llmemory.GCREF, s))
     assert const._getrepr_() == "*T"
 
+def test_repr_ll2ctypes():
+    ptr = lltype.malloc(rffi.VOIDPP.TO, 10, flavor='raw')
+    # force it to be a ll2ctypes object
+    ptr = rffi.cast(rffi.VOIDPP, rffi.cast(rffi.LONG, ptr))
+    adr = llmemory.cast_ptr_to_adr(ptr)
+    lltype.free(ptr, flavor='raw')
+    intval = llmemory.cast_adr_to_int(adr, 'symbolic')
+    box = BoxInt(intval)
+    s = box.repr_rpython()
+    assert s.startswith('12345/') # the arbitrary hash value used by
+                                  # make_hashable_int
+
 def test_same_constant():
     c1a = ConstInt(0)
     c1b = ConstInt(0)
diff --git a/pypy/jit/metainterp/test/test_jitdriver.py b/pypy/jit/metainterp/test/test_jitdriver.py
--- a/pypy/jit/metainterp/test/test_jitdriver.py
+++ b/pypy/jit/metainterp/test/test_jitdriver.py
@@ -113,6 +113,7 @@
             return n
         #
         def loop2(g, r):
+            myjitdriver1.set_param('function_threshold', 0)
             while r > 0:
                 myjitdriver2.can_enter_jit(g=g, r=r)
                 myjitdriver2.jit_merge_point(g=g, r=r)
diff --git a/pypy/jit/metainterp/test/test_jitprof.py b/pypy/jit/metainterp/test/test_jitprof.py
--- a/pypy/jit/metainterp/test/test_jitprof.py
+++ b/pypy/jit/metainterp/test/test_jitprof.py
@@ -1,6 +1,6 @@
 
 from pypy.jit.metainterp.warmspot import ll_meta_interp
-from pypy.rlib.jit import JitDriver, dont_look_inside, purefunction
+from pypy.rlib.jit import JitDriver, dont_look_inside, elidable
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.metainterp import pyjitpl
 from pypy.jit.metainterp.jitprof import *
@@ -89,7 +89,7 @@
         assert profiler.calls == 1
 
     def test_blackhole_pure(self):
-        @purefunction
+        @elidable
         def g(n):
             return n+1
         
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -49,7 +49,7 @@
                 x = l[n]
                 l = [3] * 100
                 l[3] = x
-                l[3] = x + 1
+                l[4] = x + 1
                 n -= 1
             return l[0]
 
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -4,7 +4,7 @@
 from pypy.jit.metainterp import logger
 from pypy.jit.metainterp.typesystem import llhelper
 from StringIO import StringIO
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 from pypy.jit.metainterp.history import AbstractDescr, LoopToken, BasicFailDescr
 from pypy.jit.backend.model import AbstractCPU
 
@@ -36,19 +36,29 @@
         return capturing(logger.Logger.log_loop, self,
                          loop.inputargs, loop.operations, ops_offset=ops_offset)
 
-    def repr_of_descr(self, descr):
-        for k, v in self.namespace.items():
-            if v == descr:
-                return k
-        return descr.repr_of_descr()
+    def _make_log_operations(self1):
+        class LogOperations(logger.LogOperations):
+            def repr_of_descr(self, descr):
+                for k, v in self1.namespace.items():
+                    if v == descr:
+                        return k
+                return descr.repr_of_descr()
+        logops = LogOperations(self1.metainterp_sd, self1.guard_number)
+        self1.logops = logops
+        return logops
 
 class TestLogger(object):
     ts = llhelper
 
     def make_metainterp_sd(self):
+        class FakeJitDriver(object):
+            class warmstate(object):
+                get_location_str = staticmethod(lambda args: "dupa")
+        
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
+            jitdrivers_sd = [FakeJitDriver()]
             def get_name_from_address(self, addr):
                 return 'Name'
         return FakeMetaInterpSd()
@@ -66,7 +76,7 @@
         if check_equal:
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
-        return loop, oloop
+        return logger, loop, oloop
     
     def test_simple(self):
         inp = '''
@@ -106,18 +116,18 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point("info", 0)
+        debug_merge_point(0, 0)
         '''
-        loop, oloop = self.reparse(inp, check_equal=False)
-        assert loop.operations[0].getarg(0)._get_str() == 'info'
-        assert oloop.operations[0].getarg(0)._get_str() == 'info'
+        _, loop, oloop = self.reparse(inp, check_equal=False)
+        assert loop.operations[0].getarg(1).getint() == 0
+        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
         
     def test_floats(self):
         inp = '''
         [f0]
         f1 = float_add(3.5, f0)
         '''
-        loop, oloop = self.reparse(inp)
+        _, loop, oloop = self.reparse(inp)
         equaloplists(loop.operations, oloop.operations)
 
     def test_jump(self):
@@ -179,6 +189,17 @@
         assert output.splitlines()[0] == "# bridge out of Guard 3 with 0 ops"
         pure_parse(output)
 
+    def test_repr_single_op(self):
+        inp = '''
+        [i0, i1, i2, p3, p4, p5]
+        i6 = int_add(i1, i2)
+        i8 = int_add(i6, 3)
+        jump(i0, i8, i6, p3, p4, p5)
+        '''
+        logger, loop, _ = self.reparse(inp)
+        op = loop.operations[1]
+        assert logger.logops.repr_of_resop(op) == "i8 = int_add(i6, 3)"
+
     def test_ops_offset(self):
         inp = '''
         [i0]
diff --git a/pypy/jit/metainterp/test/test_pyjitpl.py b/pypy/jit/metainterp/test/test_pyjitpl.py
--- a/pypy/jit/metainterp/test/test_pyjitpl.py
+++ b/pypy/jit/metainterp/test/test_pyjitpl.py
@@ -6,7 +6,7 @@
 from pypy.jit.metainterp.history import BoxInt, ConstInt
 from pypy.jit.metainterp.history import History
 from pypy.jit.metainterp.resoperation import ResOperation, rop
-from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
+from pypy.jit.metainterp.optimizeopt.util import equaloplists
 from pypy.jit.codewriter.jitcode import JitCode
 
 
diff --git a/pypy/jit/metainterp/test/test_recursive.py b/pypy/jit/metainterp/test/test_recursive.py
--- a/pypy/jit/metainterp/test/test_recursive.py
+++ b/pypy/jit/metainterp/test/test_recursive.py
@@ -1,6 +1,6 @@
 import py
 from pypy.rlib.jit import JitDriver, we_are_jitted, hint
-from pypy.rlib.jit import unroll_safe, dont_look_inside
+from pypy.rlib.jit import unroll_safe, dont_look_inside, promote
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import fatalerror
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
@@ -483,6 +483,7 @@
  
         def main(inline):
             myjitdriver.set_param("threshold", 10)
+            myjitdriver.set_param('function_threshold', 60)
             if inline:
                 myjitdriver.set_param('inlining', True)
             else:
@@ -925,7 +926,7 @@
                 myjitdriver.can_enter_jit(codeno=codeno, frame=frame, n=n, x=x)
                 myjitdriver.jit_merge_point(codeno=codeno, frame=frame, n=n,
                                             x=x)
-                frame.s = hint(frame.s, promote=True)
+                frame.s = promote(frame.s)
                 n -= 1
                 s = frame.s
                 assert s >= 0
@@ -1193,6 +1194,51 @@
                 i -= 1
         self.meta_interp(portal, [0, 10], inline=True)
 
+    def test_trace_from_start_always(self):
+        from pypy.rlib.nonconst import NonConstant
+        
+        driver = JitDriver(greens = ['c'], reds = ['i', 'v'])
+
+        def portal(c, i, v):
+            while i > 0:
+                driver.jit_merge_point(c=c, i=i, v=v)
+                portal(c, i - 1, v)
+                if v:
+                    driver.can_enter_jit(c=c, i=i, v=v)
+                break
+
+        def main(c, i, set_param, v):
+            if set_param:
+                driver.set_param('function_threshold', 0)
+            portal(c, i, v)
+
+        self.meta_interp(main, [10, 10, False, False], inline=True)
+        self.check_tree_loop_count(1)
+        self.check_loop_count(0)
+        self.meta_interp(main, [3, 10, True, False], inline=True)
+        self.check_tree_loop_count(0)
+        self.check_loop_count(0)
+
+    def test_trace_from_start_does_not_prevent_inlining(self):
+        driver = JitDriver(greens = ['c', 'bc'], reds = ['i'])
+        
+        def portal(bc, c, i):
+            while True:
+                driver.jit_merge_point(c=c, bc=bc, i=i)
+                if bc == 0:
+                    portal(1, 8, 0)
+                    c += 1
+                else:
+                    return
+                if c == 10: # bc == 0                    
+                    c = 0
+                    if i >= 100:
+                        return
+                    driver.can_enter_jit(c=c, bc=bc, i=i)
+                i += 1
+
+        self.meta_interp(portal, [0, 0, 0], inline=True)
+        self.check_loops(call=0, call_may_force=0)
 
 class TestLLtype(RecursiveTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_resume.py b/pypy/jit/metainterp/test/test_resume.py
--- a/pypy/jit/metainterp/test/test_resume.py
+++ b/pypy/jit/metainterp/test/test_resume.py
@@ -6,7 +6,7 @@
 from pypy.jit.metainterp.resume import *
 from pypy.jit.metainterp.history import BoxInt, BoxPtr, ConstInt
 from pypy.jit.metainterp.history import ConstPtr, ConstFloat
-from pypy.jit.metainterp.test.test_optimizeutil import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from pypy.jit.metainterp import executor
 from pypy.jit.codewriter import heaptracker, longlong
 
@@ -1238,7 +1238,7 @@
     liveboxes = []
     modifier._number_virtuals(liveboxes, values, 0)
     assert liveboxes == [b2s, b4s] or liveboxes == [b4s, b2s]
-    modifier._add_pending_fields([(LLtypeMixin.nextdescr, b2s, b4s)])
+    modifier._add_pending_fields([(LLtypeMixin.nextdescr, b2s, b4s, -1)])
     storage.rd_consts = memo.consts[:]
     storage.rd_numb = None
     # resume
@@ -1259,6 +1259,106 @@
     assert len(expected) == len(trace)
     assert demo55.next == demo66
 
+def test_virtual_adder_pending_fields_and_arrayitems():
+    class Storage(object):
+        pass
+    storage = Storage()
+    modifier = ResumeDataVirtualAdder(storage, None)
+    modifier._add_pending_fields([])
+    assert not storage.rd_pendingfields
+    #
+    class FieldDescr(object):
+        pass
+    field_a = FieldDescr()
+    storage = Storage()
+    modifier = ResumeDataVirtualAdder(storage, None)
+    modifier.liveboxes_from_env = {42: rffi.cast(rffi.SHORT, 1042),
+                                   61: rffi.cast(rffi.SHORT, 1061)}
+    modifier._add_pending_fields([(field_a, 42, 61, -1)])
+    pf = storage.rd_pendingfields
+    assert len(pf) == 1
+    assert (annlowlevel.cast_base_ptr_to_instance(FieldDescr, pf[0].lldescr)
+            is field_a)
+    assert rffi.cast(lltype.Signed, pf[0].num) == 1042
+    assert rffi.cast(lltype.Signed, pf[0].fieldnum) == 1061
+    assert rffi.cast(lltype.Signed, pf[0].itemindex) == -1
+    #
+    array_a = FieldDescr()
+    storage = Storage()
+    modifier = ResumeDataVirtualAdder(storage, None)
+    modifier.liveboxes_from_env = {42: rffi.cast(rffi.SHORT, 1042),
+                                   61: rffi.cast(rffi.SHORT, 1061),
+                                   62: rffi.cast(rffi.SHORT, 1062),
+                                   63: rffi.cast(rffi.SHORT, 1063)}
+    modifier._add_pending_fields([(array_a, 42, 61, 0),
+                                  (array_a, 42, 62, 2147483647)])
+    pf = storage.rd_pendingfields
+    assert len(pf) == 2
+    assert (annlowlevel.cast_base_ptr_to_instance(FieldDescr, pf[0].lldescr)
+            is array_a)
+    assert rffi.cast(lltype.Signed, pf[0].num) == 1042
+    assert rffi.cast(lltype.Signed, pf[0].fieldnum) == 1061
+    assert rffi.cast(lltype.Signed, pf[0].itemindex) == 0
+    assert (annlowlevel.cast_base_ptr_to_instance(FieldDescr, pf[1].lldescr)
+            is array_a)
+    assert rffi.cast(lltype.Signed, pf[1].num) == 1042
+    assert rffi.cast(lltype.Signed, pf[1].fieldnum) == 1062
+    assert rffi.cast(lltype.Signed, pf[1].itemindex) == 2147483647
+    #
+    from pypy.jit.metainterp.pyjitpl import SwitchToBlackhole
+    py.test.raises(SwitchToBlackhole, modifier._add_pending_fields,
+                   [(array_a, 42, 63, 2147483648)])
+
+def test_resume_reader_fields_and_arrayitems():
+    class ResumeReader(AbstractResumeDataReader):
+        def __init__(self, got=None, got_array=None):
+            self.got = got
+            self.got_array = got_array
+        def setfield(self, descr, struct, fieldnum):
+            assert lltype.typeOf(struct) is lltype.Signed
+            assert lltype.typeOf(fieldnum) is rffi.SHORT
+            fieldnum = rffi.cast(lltype.Signed, fieldnum)
+            self.got.append((descr, struct, fieldnum))
+        def setarrayitem(self, arraydescr, array, index, fieldnum):
+            assert lltype.typeOf(array) is lltype.Signed
+            assert lltype.typeOf(index) is lltype.Signed
+            assert lltype.typeOf(fieldnum) is rffi.SHORT
+            fieldnum = rffi.cast(lltype.Signed, fieldnum)
+            self.got_array.append((arraydescr, array, index, fieldnum))
+        def decode_ref(self, num):
+            return rffi.cast(lltype.Signed, num) * 100
+    got = []
+    pf = lltype.nullptr(PENDINGFIELDSP.TO)
+    ResumeReader(got)._prepare_pendingfields(pf)
+    assert got == []
+    #
+    class FieldDescr(AbstractDescr):
+        pass
+    field_a = FieldDescr()
+    field_b = FieldDescr()
+    pf = lltype.malloc(PENDINGFIELDSP.TO, 2)
+    pf[0].lldescr = annlowlevel.cast_instance_to_base_ptr(field_a)
+    pf[0].num = rffi.cast(rffi.SHORT, 1042)
+    pf[0].fieldnum = rffi.cast(rffi.SHORT, 1061)
+    pf[0].itemindex = rffi.cast(rffi.INT, -1)
+    pf[1].lldescr = annlowlevel.cast_instance_to_base_ptr(field_b)
+    pf[1].num = rffi.cast(rffi.SHORT, 2042)
+    pf[1].fieldnum = rffi.cast(rffi.SHORT, 2061)
+    pf[1].itemindex = rffi.cast(rffi.INT, -1)
+    got = []
+    ResumeReader(got)._prepare_pendingfields(pf)
+    assert got == [(field_a, 104200, 1061), (field_b, 204200, 2061)]
+    #
+    array_a = FieldDescr()
+    pf = lltype.malloc(PENDINGFIELDSP.TO, 1)
+    pf[0].lldescr = annlowlevel.cast_instance_to_base_ptr(array_a)
+    pf[0].num = rffi.cast(rffi.SHORT, 1042)
+    pf[0].fieldnum = rffi.cast(rffi.SHORT, 1063)
+    pf[0].itemindex = rffi.cast(rffi.INT, 123)
+    got_array = []
+    ResumeReader(got_array=got_array)._prepare_pendingfields(pf)
+    assert got_array == [(array_a, 104200, 123, 1063)]
+
 
 def test_invalidation_needed():
     class options:
diff --git a/pypy/jit/metainterp/test/test_send.py b/pypy/jit/metainterp/test/test_send.py
--- a/pypy/jit/metainterp/test/test_send.py
+++ b/pypy/jit/metainterp/test/test_send.py
@@ -1,5 +1,5 @@
 import py
-from pypy.rlib.jit import JitDriver, hint, purefunction
+from pypy.rlib.jit import JitDriver, promote, elidable
 from pypy.jit.codewriter.policy import StopAtXPolicy
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
@@ -604,7 +604,7 @@
     def test_constfold_pure_oosend(self):
         myjitdriver = JitDriver(greens=[], reds = ['i', 'obj'])
         class A:
-            @purefunction
+            @elidable
             def foo(self):
                 return 42
         def fn(n, i):
@@ -613,7 +613,7 @@
             while i > 0:
                 myjitdriver.can_enter_jit(i=i, obj=obj)
                 myjitdriver.jit_merge_point(i=i, obj=obj)
-                obj = hint(obj, promote=True)
+                promote(obj)
                 res = obj.foo()
                 i-=1
             return res
diff --git a/pypy/jit/metainterp/test/test_virtual.py b/pypy/jit/metainterp/test/test_virtual.py
--- a/pypy/jit/metainterp/test/test_virtual.py
+++ b/pypy/jit/metainterp/test/test_virtual.py
@@ -1,5 +1,5 @@
 import py
-from pypy.rlib.jit import JitDriver, hint
+from pypy.rlib.jit import JitDriver, promote
 from pypy.rlib.objectmodel import compute_unique_id
 from pypy.jit.codewriter.policy import StopAtXPolicy
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
@@ -300,7 +300,7 @@
             while n > 0:
                 myjitdriver.can_enter_jit(n=n, i=i, stufflist=stufflist)
                 myjitdriver.jit_merge_point(n=n, i=i, stufflist=stufflist)
-                i = hint(i, promote=True)
+                promote(i)
                 v = Stuff(i)
                 n -= stufflist.lst[v.x].x
             return n
diff --git a/pypy/jit/metainterp/test/test_virtualizable.py b/pypy/jit/metainterp/test/test_virtualizable.py
--- a/pypy/jit/metainterp/test/test_virtualizable.py
+++ b/pypy/jit/metainterp/test/test_virtualizable.py
@@ -5,13 +5,13 @@
 from pypy.rpython.rclass import IR_IMMUTABLE, IR_IMMUTABLE_ARRAY
 from pypy.jit.codewriter.policy import StopAtXPolicy
 from pypy.jit.codewriter import heaptracker
-from pypy.rlib.jit import JitDriver, hint, dont_look_inside
+from pypy.rlib.jit import JitDriver, hint, dont_look_inside, promote
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 from pypy.rpython.rclass import FieldListAccessor
 from pypy.jit.metainterp.warmspot import get_stats, get_translator
 from pypy.jit.metainterp import history
-from pypy.jit.metainterp.test.test_optimizeutil import LLtypeMixin
+from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 
 def promote_virtualizable(*args):
     pass
@@ -480,7 +480,7 @@
             while n > 0:
                 myjitdriver.can_enter_jit(frame=frame, n=n, x=x)
                 myjitdriver.jit_merge_point(frame=frame, n=n, x=x)
-                frame.s = hint(frame.s, promote=True)
+                frame.s = promote(frame.s)
                 n -= 1
                 s = frame.s
                 assert s >= 0
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -80,7 +80,7 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == 'GREEN IS 123.'
+            assert loc == (0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -181,6 +181,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
@@ -207,6 +208,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = llhelper(GET_LOCATION, get_location)
         _confirm_enter_jit_ptr = None
@@ -230,6 +232,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
@@ -253,6 +256,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
diff --git a/pypy/jit/metainterp/virtualref.py b/pypy/jit/metainterp/virtualref.py
--- a/pypy/jit/metainterp/virtualref.py
+++ b/pypy/jit/metainterp/virtualref.py
@@ -1,5 +1,5 @@
 from pypy.rpython.rmodel import inputconst, log
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rclass
+from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.jit.metainterp import history
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.jit import InvalidVirtualRef
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -1,6 +1,5 @@
 import sys, py
-from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr
-from pypy.rpython.ootypesystem import ootype
+from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.annlowlevel import llhelper, MixLevelHelperAnnotator,\
      cast_base_ptr_to_instance, hlstr
 from pypy.annotation import model as annmodel
@@ -10,16 +9,12 @@
 from pypy.objspace.flow.model import checkgraph, Link, copygraph
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.rarithmetic import r_uint, intmask
-from pypy.rlib.debug import debug_print, fatalerror
-from pypy.rlib.debug import debug_start, debug_stop
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.translator.simplify import get_funcobj, get_functype
+from pypy.rlib.debug import fatalerror
+from pypy.translator.simplify import get_functype
 from pypy.translator.unsimplify import call_final_function
 
 from pypy.jit.metainterp import history, pyjitpl, gc, memmgr
-from pypy.jit.metainterp.pyjitpl import MetaInterpStaticData, MetaInterp
-from pypy.jit.metainterp.typesystem import LLTypeHelper, OOTypeHelper
+from pypy.jit.metainterp.pyjitpl import MetaInterpStaticData
 from pypy.jit.metainterp.jitprof import Profiler, EmptyProfiler
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.jitdriver import JitDriverStaticData
@@ -66,6 +61,7 @@
 def jittify_and_run(interp, graph, args, repeat=1,
                     backendopt=False, trace_limit=sys.maxint,
                     inline=False, loop_longevity=0, retrace_limit=5,
+                    function_threshold=4,
                     enable_opts=ALL_OPTS_NAMES, **kwds):
     from pypy.config.config import ConfigError
     translator = interp.typer.annotator.translator
@@ -77,9 +73,14 @@
         translator.config.translation.list_comprehension_operations = True
     except ConfigError:
         pass
+    try:
+        translator.config.translation.jit_ffi = True
+    except ConfigError:
+        pass
     warmrunnerdesc = WarmRunnerDesc(translator, backendopt=backendopt, **kwds)
     for jd in warmrunnerdesc.jitdrivers_sd:
         jd.warmstate.set_param_threshold(3)          # for tests
+        jd.warmstate.set_param_function_threshold(function_threshold)
         jd.warmstate.set_param_trace_eagerness(2)    # for tests
         jd.warmstate.set_param_trace_limit(trace_limit)
         jd.warmstate.set_param_inlining(inline)
@@ -291,9 +292,6 @@
         self.stats = stats
         if translate_support_code:
             self.annhelper = MixLevelHelperAnnotator(self.translator.rtyper)
-            annhelper = self.annhelper
-        else:
-            annhelper = None
         cpu = CPUClass(self.translator.rtyper, self.stats, self.opt,
                        translate_support_code, gcdescr=self.gcdescr)
         self.cpu = cpu
@@ -422,7 +420,7 @@
         if self.translator.rtyper.type_system.name == 'lltypesystem':
             def maybe_enter_jit(*args):
                 try:
-                    maybe_compile_and_run(*args)
+                    maybe_compile_and_run(state.increment_threshold, *args)
                 except JitException:
                     raise     # go through
                 except Exception, e:
@@ -430,15 +428,12 @@
             maybe_enter_jit._always_inline_ = True
         else:
             def maybe_enter_jit(*args):
-                maybe_compile_and_run(*args)
+                maybe_compile_and_run(state.increment_threshold, *args)
             maybe_enter_jit._always_inline_ = True
         jd._maybe_enter_jit_fn = maybe_enter_jit
 
-        can_inline = state.can_inline_greenargs
-        num_green_args = jd.num_green_args
         def maybe_enter_from_start(*args):
-            if not can_inline(*args[:num_green_args]):
-                maybe_compile_and_run(*args)
+            maybe_compile_and_run(state.increment_function_threshold, *args)
         maybe_enter_from_start._always_inline_ = True
         jd._maybe_enter_from_start_fn = maybe_enter_from_start
 
@@ -549,7 +544,6 @@
             self.rewrite_can_enter_jit(jd, sublist)
 
     def rewrite_can_enter_jit(self, jd, can_enter_jits):
-        FUNC = jd._JIT_ENTER_FUNCTYPE
         FUNCPTR = jd._PTR_JIT_ENTER_FUNCTYPE
         jit_enter_fnptr = self.helper_func(FUNCPTR, jd._maybe_enter_jit_fn)
 
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -1,7 +1,7 @@
 import sys, weakref
 from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
-from pypy.rpython.annlowlevel import hlstr, llstr, cast_base_ptr_to_instance
+from pypy.rpython.annlowlevel import hlstr, cast_base_ptr_to_instance
 from pypy.rpython.annlowlevel import cast_object_to_ptr
 from pypy.rlib.objectmodel import specialize, we_are_translated, r_dict
 from pypy.rlib.rarithmetic import intmask
@@ -208,15 +208,20 @@
             meth = getattr(self, 'set_param_' + name)
             meth(default_value)
 
-    def set_param_threshold(self, threshold):
+    def _compute_threshold(self, threshold):
         if threshold <= 0:
-            self.increment_threshold = 0   # never reach the THRESHOLD_LIMIT
-            return
+            return 0 # never reach the THRESHOLD_LIMIT
         if threshold < 2:
             threshold = 2
-        self.increment_threshold = (self.THRESHOLD_LIMIT // threshold) + 1
+        return (self.THRESHOLD_LIMIT // threshold) + 1
         # the number is at least 1, and at most about half THRESHOLD_LIMIT
 
+    def set_param_threshold(self, threshold):
+        self.increment_threshold = self._compute_threshold(threshold)
+
+    def set_param_function_threshold(self, threshold):
+        self.increment_function_threshold = self._compute_threshold(threshold)
+
     def set_param_trace_eagerness(self, value):
         self.trace_eagerness = value
 
@@ -232,7 +237,7 @@
         d = {}
         if NonConstant(False):
             value = 'blah' # not a constant ''
-        if value is None:
+        if value is None or value == 'all':
             value = ALL_OPTS_NAMES
         for name in value.split(":"):
             if name:
@@ -291,7 +296,7 @@
         self.make_jitdriver_callbacks()
         confirm_enter_jit = self.confirm_enter_jit
 
-        def maybe_compile_and_run(*args):
+        def maybe_compile_and_run(threshold, *args):
             """Entry point to the JIT.  Called at the point with the
             can_enter_jit() hint.
             """
@@ -307,7 +312,7 @@
 
             if cell.counter >= 0:
                 # update the profiling counter
-                n = cell.counter + self.increment_threshold
+                n = cell.counter + threshold
                 if n <= self.THRESHOLD_LIMIT:       # bound not reached
                     cell.counter = n
                     return
@@ -497,7 +502,6 @@
         if hasattr(self, 'set_future_values'):
             return self.set_future_values
 
-        warmrunnerdesc = self.warmrunnerdesc
         jitdriver_sd   = self.jitdriver_sd
         cpu = self.cpu
         vinfo = jitdriver_sd.virtualizable_info
@@ -513,7 +517,6 @@
         #
         if vinfo is not None:
             i0 = len(jitdriver_sd._red_args_types)
-            num_green_args = jitdriver_sd.num_green_args
             index_of_virtualizable = jitdriver_sd.index_of_virtualizable
             vable_static_fields = unrolling_iterable(
                 zip(vinfo.static_extra_types, vinfo.static_fields))
@@ -599,12 +602,8 @@
         get_location_ptr = self.jitdriver_sd._get_printable_location_ptr
         if get_location_ptr is None:
             missing = '(no jitdriver.get_printable_location!)'
-            missingll = llstr(missing)
             def get_location_str(greenkey):
-                if we_are_translated():
-                    return missingll
-                else:
-                    return missing
+                return missing
         else:
             rtyper = self.warmrunnerdesc.rtyper
             unwrap_greenkey = self.make_unwrap_greenkey()
@@ -612,10 +611,10 @@
             def get_location_str(greenkey):
                 greenargs = unwrap_greenkey(greenkey)
                 fn = support.maybe_on_top_of_llinterp(rtyper, get_location_ptr)
-                res = fn(*greenargs)
-                if not we_are_translated() and not isinstance(res, str):
-                    res = hlstr(res)
-                return res
+                llres = fn(*greenargs)
+                if not we_are_translated() and isinstance(llres, str):
+                    return llres
+                return hlstr(llres)
         self.get_location_str = get_location_str
         #
         confirm_enter_jit_ptr = self.jitdriver_sd._confirm_enter_jit_ptr
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -30,6 +30,7 @@
     BACKEND = 'c'
 
 config = get_pypy_config(translating=True)
+config.translation.backendopt.inline_threshold = 0.1
 config.translation.gc = 'boehm'
 config.objspace.nofaking = True
 config.translating = True
diff --git a/pypy/jit/tl/spli/interpreter.py b/pypy/jit/tl/spli/interpreter.py
--- a/pypy/jit/tl/spli/interpreter.py
+++ b/pypy/jit/tl/spli/interpreter.py
@@ -2,7 +2,7 @@
 from pypy.tool import stdlib_opcode
 from pypy.jit.tl.spli import objects, pycode
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.jit import JitDriver, hint, dont_look_inside
+from pypy.rlib.jit import JitDriver, promote, dont_look_inside
 from pypy.rlib.objectmodel import we_are_translated
 
 opcode_method_names = stdlib_opcode.host_bytecode_spec.method_names
@@ -78,7 +78,7 @@
         while True:
             jitdriver.jit_merge_point(code=code, instr_index=instr_index,
                                       frame=self)
-            self.stack_depth = hint(self.stack_depth, promote=True)
+            self.stack_depth = promote(self.stack_depth)
             op = ord(code[instr_index])
             instr_index += 1
             if op >= HAVE_ARGUMENT:
diff --git a/pypy/jit/tl/tiny2.py b/pypy/jit/tl/tiny2.py
--- a/pypy/jit/tl/tiny2.py
+++ b/pypy/jit/tl/tiny2.py
@@ -27,7 +27,7 @@
     { #1 #1 1 SUB ->#1 #1 }    => when called with 5, gives '5 4 3 2 1'
 
 """
-from pypy.rlib.jit import hint
+from pypy.rlib.jit import hint, promote
 
 #
 # See pypy/doc/jit.txt for a higher-level overview of the JIT techniques
@@ -75,9 +75,9 @@
     # ones.  The JIT compiler cannot look into indirect calls, but it
     # can analyze and inline the code in directly-called functions.
     y = stack.pop()
-    hint(y.__class__, promote=True)
+    promote(y.__class__)
     x = stack.pop()
-    hint(x.__class__, promote=True)
+    promote(x.__class__)
     try:
         z = IntBox(func_int(x.as_int(), y.as_int()))
     except ValueError:
@@ -108,7 +108,7 @@
     # doesn't have to worry about the 'args' list being unpredictably
     # modified.
     oldargs = args
-    argcount = hint(len(oldargs), promote=True)
+    argcount = promote(len(oldargs))
     args = []
     n = 0
     while n < argcount:
@@ -160,8 +160,7 @@
                 # read out of the 'loops' list will be a compile-time constant
                 # because it was pushed as a compile-time constant by the '{'
                 # case above into 'loops', which is a virtual list, so the
-                # promotion below is just a way to make the colors match.
-                pos = hint(pos, promote=True)
+                promote(pos)
         else:
             stack.append(StrBox(opcode))
     return stack
diff --git a/pypy/jit/tl/tiny2_hotpath.py b/pypy/jit/tl/tiny2_hotpath.py
--- a/pypy/jit/tl/tiny2_hotpath.py
+++ b/pypy/jit/tl/tiny2_hotpath.py
@@ -27,7 +27,7 @@
     { #1 #1 1 SUB ->#1 #1 }    => when called with 5, gives '5 4 3 2 1'
 
 """
-from pypy.rlib.jit import hint, JitDriver
+from pypy.rlib.jit import hint, promote, JitDriver
 
 #
 # See pypy/doc/jit.txt for a higher-level overview of the JIT techniques
@@ -77,9 +77,9 @@
     # ones.  The JIT compiler cannot look into indirect calls, but it
     # can analyze and inline the code in directly-called functions.
     stack, y = stack.pop()
-    hint(y.__class__, promote=True)
+    promote(y.__class__)
     stack, x = stack.pop()
-    hint(x.__class__, promote=True)
+    promote(x.__class__)
     try:
         z = IntBox(func_int(x.as_int(), y.as_int()))
     except ValueError:
@@ -120,7 +120,7 @@
         # modified.
         oldloops = invariants
         oldargs = reds.args
-        argcount = hint(len(oldargs), promote=True)
+        argcount = promote(len(oldargs))
         args = []
         n = 0
         while n < argcount:
@@ -189,7 +189,7 @@
                 # because it was pushed as a compile-time constant by the '{'
                 # case above into 'loops', which is a virtual list, so the
                 # promotion below is just a way to make the colors match.
-                pos = hint(pos, promote=True)
+                pos = promote(pos)
                 tinyjitdriver.can_enter_jit(args=args, loops=loops, stack=stack,
                                             bytecode=bytecode, pos=pos)
         else:
diff --git a/pypy/jit/tl/tiny3_hotpath.py b/pypy/jit/tl/tiny3_hotpath.py
--- a/pypy/jit/tl/tiny3_hotpath.py
+++ b/pypy/jit/tl/tiny3_hotpath.py
@@ -28,7 +28,7 @@
     { #1 #1 1 SUB ->#1 #1 }    => when called with 5, gives '5 4 3 2 1'
 
 """
-from pypy.rlib.jit import hint, JitDriver
+from pypy.rlib.jit import promote, hint, JitDriver
 from pypy.rlib.objectmodel import specialize
 
 #
@@ -83,9 +83,9 @@
     # ones.  The JIT compiler cannot look into indirect calls, but it
     # can analyze and inline the code in directly-called functions.
     stack, y = stack.pop()
-    hint(y.__class__, promote=True)
+    promote(y.__class__)
     stack, x = stack.pop()
-    hint(x.__class__, promote=True)
+    promote(x.__class__)
     if isinstance(x, IntBox) and isinstance(y, IntBox):
         z = IntBox(func_int(x.as_int(), y.as_int()))
     else:
@@ -125,7 +125,7 @@
         # modified.
         oldloops = invariants
         oldargs = reds.args
-        argcount = hint(len(oldargs), promote=True)
+        argcount = promote(len(oldargs))
         args = []
         n = 0
         while n < argcount:
@@ -194,7 +194,7 @@
                 # because it was pushed as a compile-time constant by the '{'
                 # case above into 'loops', which is a virtual list, so the
                 # promotion below is just a way to make the colors match.
-                pos = hint(pos, promote=True)
+                pos = promote(pos)
                 tinyjitdriver.can_enter_jit(args=args, loops=loops, stack=stack,
                                             bytecode=bytecode, pos=pos)
         else:
diff --git a/pypy/jit/tl/tl.py b/pypy/jit/tl/tl.py
--- a/pypy/jit/tl/tl.py
+++ b/pypy/jit/tl/tl.py
@@ -2,7 +2,7 @@
 
 import py
 from pypy.jit.tl.tlopcode import *
-from pypy.rlib.jit import JitDriver, hint, dont_look_inside
+from pypy.rlib.jit import JitDriver, hint, dont_look_inside, promote
 
 def char2int(c):
     t = ord(c)
@@ -81,7 +81,7 @@
             myjitdriver.jit_merge_point(pc=pc, code=code,
                                         stack=stack, inputarg=inputarg)
             opcode = ord(code[pc])
-            stack.stackpos = hint(stack.stackpos, promote=True)
+            stack.stackpos = promote(stack.stackpos)
             pc += 1
 
             if opcode == NOP:
diff --git a/pypy/jit/tl/tlc.py b/pypy/jit/tl/tlc.py
--- a/pypy/jit/tl/tlc.py
+++ b/pypy/jit/tl/tlc.py
@@ -5,7 +5,7 @@
 from pypy.rlib.objectmodel import specialize, we_are_translated
 from pypy.jit.tl.tlopcode import *
 from pypy.jit.tl import tlopcode
-from pypy.rlib.jit import JitDriver
+from pypy.rlib.jit import JitDriver, elidable
 
 class Obj(object):
 
@@ -71,6 +71,7 @@
 
     classes = [] # [(descr, cls), ...]
 
+    @elidable
     def get(key):
         for descr, cls in Class.classes:
             if key.attributes == descr.attributes and\
@@ -79,7 +80,6 @@
         result = Class(key)
         Class.classes.append((key, result))
         return result
-    get._pure_function_ = True
     get = staticmethod(get)
 
     def __init__(self, descr):
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -3,24 +3,15 @@
 in a nicer fashion
 """
 
-from pypy.jit.metainterp.history import TreeLoop, BoxInt, ConstInt,\
-     ConstObj, ConstPtr, Box, BasicFailDescr, BoxFloat, ConstFloat,\
-     LoopToken, get_const_ptr_for_string, get_const_ptr_for_unicode
+from pypy.jit.tool.oparser_model import get_model
+
 from pypy.jit.metainterp.resoperation import rop, ResOperation, \
                                             ResOpWithDescr, N_aryOp, \
                                             UnaryOp, PlainResOp
-from pypy.jit.metainterp.typesystem import llhelper
-from pypy.jit.codewriter.heaptracker import adr2int
-from pypy.jit.codewriter import longlong
-from pypy.rpython.lltypesystem import lltype, llmemory
-from pypy.rpython.ootypesystem import ootype
 
 class ParseError(Exception):
     pass
 
-class Boxes(object):
-    pass
-
 class ESCAPE_OP(N_aryOp, ResOpWithDescr):
 
     OPNUM = -123
@@ -54,37 +45,15 @@
     def clone(self):
         return FORCE_SPILL(self.OPNUM, self.getarglist()[:])
 
-class ExtendedTreeLoop(TreeLoop):
 
-    def getboxes(self):
-        def opboxes(operations):
-            for op in operations:
-                yield op.result
-                for box in op.getarglist():
-                    yield box
-        def allboxes():
-            for box in self.inputargs:
-                yield box
-            for box in opboxes(self.operations):
-                yield box
-
-        boxes = Boxes()
-        for box in allboxes():
-            if isinstance(box, Box):
-                name = str(box)
-                setattr(boxes, name, box)
-        return boxes
-
-    def setvalues(self, **kwds):
-        boxes = self.getboxes()
-        for name, value in kwds.iteritems():
-            getattr(boxes, name).value = value
-
-def default_fail_descr(fail_args=None):
-    return BasicFailDescr()
+def default_fail_descr(model, fail_args=None):
+    return model.BasicFailDescr()
 
 
 class OpParser(object):
+
+    use_mock_model = False
+    
     def __init__(self, input, cpu, namespace, type_system, boxkinds,
                  invent_fail_descr=default_fail_descr,
                  nonstrict=False):
@@ -100,7 +69,8 @@
             self._cache = {}
         self.invent_fail_descr = invent_fail_descr
         self.nonstrict = nonstrict
-        self.looptoken = LoopToken()
+        self.model = get_model(self.use_mock_model)
+        self.looptoken = self.model.LoopToken()
 
     def get_const(self, name, typ):
         if self._consts is None:
@@ -108,16 +78,16 @@
         obj = self._consts[name]
         if self.type_system == 'lltype':
             if typ == 'ptr':
-                return ConstPtr(obj)
+                return self.model.ConstPtr(obj)
             else:
                 assert typ == 'class'
-                return ConstInt(adr2int(llmemory.cast_ptr_to_adr(obj)))
+                return self.model.ConstInt(self.model.ptr_to_int(obj))
         else:
             if typ == 'ptr':
-                return ConstObj(obj)
+                return self.model.ConstObj(obj)
             else:
                 assert typ == 'class'
-                return ConstObj(ootype.cast_to_object(obj))
+                return self.model.ConstObj(ootype.cast_to_object(obj))
 
     def get_descr(self, poss_descr):
         if poss_descr.startswith('<'):
@@ -132,16 +102,16 @@
             pass
         if elem.startswith('i'):
             # integer
-            box = BoxInt()
-            _box_counter_more_than(elem[1:])
+            box = self.model.BoxInt()
+            _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('f'):
-            box = BoxFloat()
-            _box_counter_more_than(elem[1:])
+            box = self.model.BoxFloat()
+            _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('p'):
             # pointer
-            ts = getattr(self.cpu, 'ts', llhelper)
+            ts = getattr(self.cpu, 'ts', self.model.llhelper)
             box = ts.BoxRef()
-            _box_counter_more_than(elem[1:])
+            _box_counter_more_than(self.model, elem[1:])
         else:
             for prefix, boxclass in self.boxkinds.iteritems():
                 if elem.startswith(prefix):
@@ -175,21 +145,21 @@
 
     def getvar(self, arg):
         if not arg:
-            return ConstInt(0)
+            return self.model.ConstInt(0)
         try:
-            return ConstInt(int(arg))
+            return self.model.ConstInt(int(arg))
         except ValueError:
             if self.is_float(arg):
-                return ConstFloat(longlong.getfloatstorage(float(arg)))
+                return self.model.ConstFloat(self.model.convert_to_floatstorage(arg))
             if (arg.startswith('"') or arg.startswith("'") or
                 arg.startswith('s"')):
                 # XXX ootype
                 info = arg[1:].strip("'\"")
-                return get_const_ptr_for_string(info)
+                return self.model.get_const_ptr_for_string(info)
             if arg.startswith('u"'):
                 # XXX ootype
                 info = arg[1:].strip("'\"")
-                return get_const_ptr_for_unicode(info)
+                return self.model.get_const_ptr_for_unicode(info)
             if arg.startswith('ConstClass('):
                 name = arg[len('ConstClass('):-1]
                 return self.get_const(name, 'class')
@@ -197,9 +167,9 @@
                 return None
             elif arg == 'NULL':
                 if self.type_system == 'lltype':
-                    return ConstPtr(ConstPtr.value)
+                    return self.model.ConstPtr(self.model.ConstPtr.value)
                 else:
-                    return ConstObj(ConstObj.value)
+                    return self.model.ConstObj(self.model.ConstObj.value)
             elif arg.startswith('ConstPtr('):
                 name = arg[len('ConstPtr('):-1]
                 return self.get_const(name, 'ptr')
@@ -211,11 +181,8 @@
         args = []
         descr = None
         if argspec.strip():
-            if opname == 'debug_merge_point':
-                allargs = argspec.rsplit(', ', 1)
-            else:
-                allargs = [arg for arg in argspec.split(",")
-                           if arg != '']
+            allargs = [arg for arg in argspec.split(",")
+                       if arg != '']
 
             poss_descr = allargs[-1].strip()
             if poss_descr.startswith('descr='):
@@ -266,14 +233,14 @@
                                 "Unknown var in fail_args: %s" % arg)
                     fail_args.append(fail_arg)
             if descr is None and self.invent_fail_descr:
-                descr = self.invent_fail_descr(fail_args)
+                descr = self.invent_fail_descr(self.model, fail_args)
             if hasattr(descr, '_oparser_uses_descr_of_guard'):
                 descr._oparser_uses_descr_of_guard(self, fail_args)
         else:
             fail_args = None
             if opnum == rop.FINISH:
                 if descr is None and self.invent_fail_descr:
-                    descr = self.invent_fail_descr()
+                    descr = self.invent_fail_descr(self.model)
             elif opnum == rop.JUMP:
                 if descr is None and self.invent_fail_descr:
                     descr = self.looptoken
@@ -338,7 +305,7 @@
         num, ops, last_offset = self.parse_ops(base_indent, newlines, 0)
         if num < len(newlines):
             raise ParseError("unexpected dedent at line: %s" % newlines[num])
-        loop = ExtendedTreeLoop("loop")
+        loop = self.model.ExtendedTreeLoop("loop")
         loop.comment = first_comment
         loop.token = self.looptoken
         loop.operations = ops
@@ -370,6 +337,11 @@
                 num += 1
         return num, ops, last_offset
 
+    def postprocess(self, loop):
+        """ A hook that can be overloaded to do some postprocessing
+        """
+        return loop
+
     def parse_offset(self, line):
         if line.startswith('+'):
             # it begins with an offset, like: "+10: i1 = int_add(...)"
@@ -394,7 +366,7 @@
 
 def parse(input, cpu=None, namespace=None, type_system='lltype',
           boxkinds=None, invent_fail_descr=default_fail_descr,
-          no_namespace=False, nonstrict=False):
+          no_namespace=False, nonstrict=False, OpParser=OpParser):
     if namespace is None and not no_namespace:
         namespace = {}
     return OpParser(input, cpu, namespace, type_system, boxkinds,
@@ -405,6 +377,6 @@
     return parse(*args, **kwds)
 
 
-def _box_counter_more_than(s):
+def _box_counter_more_than(model, s):
     if s.isdigit():
-        Box._counter = max(Box._counter, int(s)+1)
+        model.Box._counter = max(model.Box._counter, int(s)+1)
diff --git a/pypy/jit/tool/oparser_model.py b/pypy/jit/tool/oparser_model.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/tool/oparser_model.py
@@ -0,0 +1,148 @@
+class Boxes(object):
+    pass
+
+def get_real_model():
+    class LoopModel(object):
+        from pypy.jit.metainterp.history import TreeLoop, LoopToken
+        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
+        from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr, ConstFloat
+        from pypy.jit.metainterp.history import BasicFailDescr
+        from pypy.jit.metainterp.typesystem import llhelper
+
+        from pypy.jit.metainterp.history import get_const_ptr_for_string
+        from pypy.jit.metainterp.history import get_const_ptr_for_unicode
+        get_const_ptr_for_string = staticmethod(get_const_ptr_for_string)
+        get_const_ptr_for_unicode = staticmethod(get_const_ptr_for_unicode)
+
+        @staticmethod
+        def convert_to_floatstorage(arg):
+            from pypy.jit.codewriter import longlong
+            return longlong.getfloatstorage(float(arg))
+
+        @staticmethod
+        def ptr_to_int(obj):
+            from pypy.jit.codewriter.heaptracker import adr2int
+            from pypy.rpython.lltypesystem import llmemory
+            return adr2int(llmemory.cast_ptr_to_adr(obj))
+
+        @staticmethod
+        def ootype_cast_to_object(obj):
+            from pypy.rpython.ootypesystem import ootype
+            return ootype.cast_to_object(obj)
+
+    return LoopModel
+
+def get_mock_model():
+    class LoopModel(object):
+
+        class TreeLoop(object):
+            def __init__(self, name):
+                self.name = name
+
+        class LoopToken(object):
+            I_am_a_descr = True
+
+        class BasicFailDescr(object):
+            I_am_a_descr = True
+
+        class Box(object):
+            _counter = 0
+            type = 'b'
+
+            def __init__(self, value=0):
+                self.value = value
+
+            def __repr__(self):
+                result = str(self)
+                result += '(%s)' % self.value
+                return result
+
+            def __str__(self):
+                if not hasattr(self, '_str'):
+                    self._str = '%s%d' % (self.type, Box._counter)
+                    Box._counter += 1
+                return self._str
+
+        class BoxInt(Box):
+            type = 'i'
+
+        class BoxFloat(Box):
+            type = 'f'
+
+        class BoxRef(Box):
+            type = 'p'
+
+        class Const(object):
+            def __init__(self, value=None):
+                self.value = value
+
+            def _get_str(self):
+                return str(self.value)
+
+        class ConstInt(Const):
+            pass
+
+        class ConstPtr(Const):
+            pass
+
+        class ConstFloat(Const):
+            pass
+
+        @classmethod
+        def get_const_ptr_for_string(cls, s):
+            return cls.ConstPtr(s)
+
+        @classmethod
+        def get_const_ptr_for_unicode(cls, s):
+            return cls.ConstPtr(s)
+
+        @staticmethod
+        def convert_to_floatstorage(arg):
+            return float(arg)
+
+        @staticmethod
+        def ptr_to_int(obj):
+            return id(obj)
+
+        class llhelper(object):
+            pass
+
+    LoopModel.llhelper.BoxRef = LoopModel.BoxRef
+
+    return LoopModel
+
+
+def get_model(use_mock):
+    if use_mock:
+        model = get_mock_model()
+    else:
+        model = get_real_model()
+
+    class ExtendedTreeLoop(model.TreeLoop):
+
+        def getboxes(self):
+            def opboxes(operations):
+                for op in operations:
+                    yield op.result
+                    for box in op.getarglist():
+                        yield box
+            def allboxes():
+                for box in self.inputargs:
+                    yield box
+                for box in opboxes(self.operations):
+                    yield box
+
+            boxes = Boxes()
+            for box in allboxes():
+                if isinstance(box, model.Box):
+                    name = str(box)
+                    setattr(boxes, name, box)
+            return boxes
+
+        def setvalues(self, **kwds):
+            boxes = self.getboxes()
+            for name, value in kwds.iteritems():
+                getattr(boxes, name).value = value
+
+    model.ExtendedTreeLoop = ExtendedTreeLoop
+    return model
diff --git a/pypy/jit/tool/pypytrace-mode.el b/pypy/jit/tool/pypytrace-mode.el
--- a/pypy/jit/tool/pypytrace-mode.el
+++ b/pypy/jit/tool/pypytrace-mode.el
@@ -8,10 +8,16 @@
 (defun set-truncate-lines ()
   (setq truncate-lines t))
 
+;; to generate the list of keywords:
+;; from pypy.jit.metainterp import resoperation
+;; print ' '.join(sorted('"%s"' % op.lower() for op in resoperation.opname.values() if not op.startswith('GUARD')))
+
+
+
 (define-generic-mode 
   'pypytrace-mode                   ;; name of the mode to create
   nil
-  '("jump" "finish" "int_add" "int_sub" "int_mul" "int_floordiv" "uint_floordiv" "int_mod" "int_and" "int_or" "int_xor" "int_rshift" "int_lshift" "uint_rshift" "float_add" "float_sub" "float_mul" "float_truediv" "float_neg" "float_abs" "cast_float_to_int" "cast_int_to_float" "int_lt" "int_le" "int_eq" "int_ne" "int_gt" "int_ge" "uint_lt" "uint_le" "uint_gt" "uint_ge" "float_lt" "float_le" "float_eq" "float_ne" "float_gt" "float_ge" "int_is_zero" "int_is_true" "int_neg" "int_invert" "same_as" "ptr_eq" "ptr_ne" "arraylen_gc" "strlen" "strgetitem" "getfield_gc_pure" "getfield_raw_pure" "getarrayitem_gc_pure" "unicodelen" "unicodegetitem" "getarrayitem_gc" "getarrayitem_raw" "getfield_gc" "getfield_raw" "new" "new_with_vtable" "new_array" "force_token" "virtual_ref" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "arraycopy" "newstr" "strsetitem" "unicodesetitem" "newunicode" "cond_call_gc_wb" "virtual_ref_finish" "call" "call_assembler" "call_may_force" "call_loopinvariant" "call_pure" "int_add_ovf" "int_sub_ovf" "int_mul_ovf") ;; keywords
+  '("arraylen_gc" "call" "call_assembler" "call_loopinvariant" "call_may_force" "call_pure" "call_release_gil" "cast_float_to_int" "cast_int_to_float" "cond_call_gc_wb" "copystrcontent" "copyunicodecontent" "debug_merge_point" "finish" "float_abs" "float_add" "float_eq" "float_ge" "float_gt" "float_le" "float_lt" "float_mul" "float_ne" "float_neg" "float_sub" "float_truediv" "force_token" "getarrayitem_gc" "getarrayitem_gc_pure" "getarrayitem_raw" "getfield_gc" "getfield_gc_pure" "getfield_raw" "getfield_raw_pure" "int_add" "int_add_ovf" "int_and" "int_eq" "int_floordiv" "int_ge" "int_gt" "int_invert" "int_is_true" "int_is_zero" "int_le" "int_lshift" "int_lt" "int_mod" "int_mul" "int_mul_ovf" "int_ne" "int_neg" "int_or" "int_rshift" "int_sub" "int_sub_ovf" "int_xor" "jit_debug" "jump" "new" "new_array" "new_with_vtable" "newstr" "newunicode" "ptr_eq" "ptr_ne" "quasiimmut_field" "read_timestamp" "same_as" "setarrayitem_gc" "setarrayitem_raw" "setfield_gc" "setfield_raw" "strgetitem" "strlen" "strsetitem" "uint_floordiv" "uint_ge" "uint_gt" "uint_le" "uint_lt" "uint_rshift" "unicodegetitem" "unicodelen" "unicodesetitem" "virtual_ref" "virtual_ref_finish") ;; keywords
   '( ;; additional regexps
     ("^# Loop.*" . 'hi-blue)
     ("\\[.*\\]" . 'font-lock-comment-face) ;; comment out argument lists
@@ -26,7 +32,7 @@
     ("<.*FieldDescr \\([^ ]*\\)" (1 'font-lock-variable-name-face))
     ;; comment out debug_merge_point, but then highlight specific part of it
     ("^debug_merge_point.*" . font-lock-comment-face)
-    ("^\\(debug_merge_point\\).*code object\\(.*\\), file \\('.*'\\), \\(line .*\\)> \\(.*\\)"
+    ("^\\(debug_merge_point\\).*code object\\(.*\\). file \\('.*'\\). \\(line .*\\)> \\(.*\\)"
      (1 'compilation-warning t)
      (2 'escape-glyph t)
      (3 'font-lock-string-face t)
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -1,227 +1,274 @@
 import py
+import sys
 from pypy.rpython.lltypesystem import lltype, llmemory
 
-from pypy.jit.tool.oparser import parse, ParseError
+from pypy.jit.tool.oparser import parse, OpParser
 from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken,\
-     BoxFloat
+from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken
 
-def test_basic_parse():
-    x = """
-    [i0, i1]
-    # a comment
-    i2 = int_add(i0, i1)
-    i3 = int_sub(i2, 3) # another comment
-    finish() # (tricky)
-    """
-    loop = parse(x)
-    assert len(loop.operations) == 3
-    assert [op.getopnum() for op in loop.operations] == [rop.INT_ADD, rop.INT_SUB,
-                                                    rop.FINISH]
-    assert len(loop.inputargs) == 2
-    assert loop.operations[-1].getdescr()
+class BaseTestOparser(object):
 
-def test_const_ptr_subops():
-    x = """
-    [p0]
-    guard_class(p0, ConstClass(vtable)) []
-    """
-    S = lltype.Struct('S')
-    vtable = lltype.nullptr(S)
-    loop = parse(x, None, locals())
-    assert len(loop.operations) == 1
-    assert loop.operations[0].getdescr()
-    assert loop.operations[0].getfailargs() == []
+    OpParser = None
 
-def test_descr():
-    class Xyz(AbstractDescr):
-        pass
-    
-    x = """
-    [p0]
-    i1 = getfield_gc(p0, descr=stuff)
-    """
-    stuff = Xyz()
-    loop = parse(x, None, locals())
-    assert loop.operations[0].getdescr() is stuff
+    def parse(self, *args, **kwds):
+        kwds['OpParser'] = self.OpParser
+        return parse(*args, **kwds)
 
-def test_after_fail():
-    x = """
-    [i0]
-    guard_value(i0, 3) []
-    i1 = int_add(1, 2)
-    """
-    loop = parse(x, None, {})
-    assert len(loop.operations) == 2
+    def test_basic_parse(self):
+        x = """
+        [i0, i1]
+        # a comment
+        i2 = int_add(i0, i1)
+        i3 = int_sub(i2, 3) # another comment
+        finish() # (tricky)
+        """
+        loop = self.parse(x)
+        assert len(loop.operations) == 3
+        assert [op.getopnum() for op in loop.operations] == [rop.INT_ADD, rop.INT_SUB,
+                                                        rop.FINISH]
+        assert len(loop.inputargs) == 2
+        assert loop.operations[-1].getdescr()
 
-def test_descr_setfield():
-    class Xyz(AbstractDescr):
-        pass
-    
-    x = """
-    [p0]
-    setfield_gc(p0, 3, descr=stuff)
-    """
-    stuff = Xyz()
-    loop = parse(x, None, locals())
-    assert loop.operations[0].getdescr() is stuff
+    def test_const_ptr_subops(self):
+        x = """
+        [p0]
+        guard_class(p0, ConstClass(vtable)) []
+        """
+        S = lltype.Struct('S')
+        vtable = lltype.nullptr(S)
+        loop = self.parse(x, None, locals())
+        assert len(loop.operations) == 1
+        assert loop.operations[0].getdescr()
+        assert loop.operations[0].getfailargs() == []
 
-def test_boxname():
-    x = """
-    [i42]
-    i50 = int_add(i42, 1)
-    """
-    loop = parse(x, None, {})
-    assert str(loop.inputargs[0]) == 'i42'
-    assert str(loop.operations[0].result) == 'i50'
+    def test_descr(self):
+        class Xyz(AbstractDescr):
+            I_am_a_descr = True # for the mock case
 
-def test_getboxes():
-    x = """
-    [i0]
-    i1 = int_add(i0, 10)
-    """
-    loop = parse(x, None, {})
-    boxes = loop.getboxes()
-    assert boxes.i0 is loop.inputargs[0]
-    assert boxes.i1 is loop.operations[0].result
-    
-def test_setvalues():
-    x = """
-    [i0]
-    i1 = int_add(i0, 10)
-    """
-    loop = parse(x, None, {})
-    loop.setvalues(i0=32, i1=42)
-    assert loop.inputargs[0].value == 32
-    assert loop.operations[0].result.value == 42
+        x = """
+        [p0]
+        i1 = getfield_gc(p0, descr=stuff)
+        """
+        stuff = Xyz()
+        loop = self.parse(x, None, locals())
+        assert loop.operations[0].getdescr() is stuff
 
-def test_boxkind():
-    x = """
-    [sum0]
-    """
-    loop = parse(x, None, {}, boxkinds={'sum': BoxInt})
-    b = loop.getboxes()
-    assert isinstance(b.sum0, BoxInt)
-    
-def test_getvar_const_ptr():
-    x = '''
-    []
-    call(ConstPtr(func_ptr))
+    def test_after_fail(self):
+        x = """
+        [i0]
+        guard_value(i0, 3) []
+        i1 = int_add(1, 2)
+        """
+        loop = self.parse(x, None, {})
+        assert len(loop.operations) == 2
+
+    def test_descr_setfield(self):
+        class Xyz(AbstractDescr):
+            I_am_a_descr = True # for the mock case
+
+        x = """
+        [p0]
+        setfield_gc(p0, 3, descr=stuff)
+        """
+        stuff = Xyz()
+        loop = self.parse(x, None, locals())
+        assert loop.operations[0].getdescr() is stuff
+
+    def test_boxname(self):
+        x = """
+        [i42]
+        i50 = int_add(i42, 1)
+        """
+        loop = self.parse(x, None, {})
+        assert str(loop.inputargs[0]) == 'i42'
+        assert str(loop.operations[0].result) == 'i50'
+
+    def test_getboxes(self):
+        x = """
+        [i0]
+        i1 = int_add(i0, 10)
+        """
+        loop = self.parse(x, None, {})
+        boxes = loop.getboxes()
+        assert boxes.i0 is loop.inputargs[0]
+        assert boxes.i1 is loop.operations[0].result
+
+    def test_setvalues(self):
+        x = """
+        [i0]
+        i1 = int_add(i0, 10)
+        """
+        loop = self.parse(x, None, {})
+        loop.setvalues(i0=32, i1=42)
+        assert loop.inputargs[0].value == 32
+        assert loop.operations[0].result.value == 42
+
+    def test_getvar_const_ptr(self):
+        x = '''
+        []
+        call(ConstPtr(func_ptr))
+        '''
+        TP = lltype.GcArray(lltype.Signed)
+        NULL = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.nullptr(TP))
+        loop = self.parse(x, None, {'func_ptr' : NULL})
+        assert loop.operations[0].getarg(0).value == NULL
+
+    def test_jump_target(self):
+        x = '''
+        []
+        jump()
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getdescr() is loop.token
+
+    def test_jump_target_other(self):
+        looptoken = LoopToken()
+        looptoken.I_am_a_descr = True # for the mock case
+        x = '''
+        []
+        jump(descr=looptoken)
+        '''
+        loop = self.parse(x, namespace=locals())
+        assert loop.operations[0].getdescr() is looptoken
+
+    def test_floats(self):
+        x = '''
+        [f0]
+        f1 = float_add(f0, 3.5)
+        '''
+        loop = self.parse(x)
+        box = loop.operations[0].getarg(0)
+        # we cannot use isinstance, because in case of mock the class will be
+        # constructed on the fly
+        assert box.__class__.__name__ == 'BoxFloat'
+
+    def test_debug_merge_point(self):
+        x = '''
+        []
+        debug_merge_point(0, "info")
+        debug_merge_point(0, 'info')
+        debug_merge_point(1, '<some ('other.')> info')
+        debug_merge_point(0, '(stuff) #1')
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getarg(1)._get_str() == 'info'
+        assert loop.operations[1].getarg(1)._get_str() == 'info'
+        assert loop.operations[2].getarg(1)._get_str() == "<some ('other.')> info"
+        assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
+
+
+    def test_descr_with_obj_print(self):
+        x = '''
+        [p0]
+        setfield_gc(p0, 1, descr=<SomeDescr>)
+        '''
+        loop = self.parse(x)
+        # assert did not explode
+
+    example_loop_log = '''\
+    # bridge out of Guard12, 6 ops
+    [i0, i1, i2]
+    i4 = int_add(i0, 2)
+    i6 = int_sub(i1, 1)
+    i8 = int_gt(i6, 3)
+    guard_true(i8, descr=<Guard15>) [i4, i6]
+    debug_merge_point('(no jitdriver.get_printable_location!)', 0)
+    jump(i6, i4, descr=<Loop0>)
     '''
-    TP = lltype.GcArray(lltype.Signed)
-    NULL = lltype.cast_opaque_ptr(llmemory.GCREF, lltype.nullptr(TP))
-    loop = parse(x, None, {'func_ptr' : NULL})
-    assert loop.operations[0].getarg(0).value == NULL
 
-def test_jump_target():
-    x = '''
-    []
-    jump()
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getdescr() is loop.token
+    def test_parse_no_namespace(self):
+        loop = self.parse(self.example_loop_log, no_namespace=True)
 
-def test_jump_target_other():
-    looptoken = LoopToken()
-    x = '''
-    []
-    jump(descr=looptoken)
-    '''
-    loop = parse(x, namespace=locals())
-    assert loop.operations[0].getdescr() is looptoken
+    def test_attach_comment_to_loop(self):
+        loop = self.parse(self.example_loop_log, no_namespace=True)
+        assert loop.comment == '    # bridge out of Guard12, 6 ops'
 
-def test_floats():
-    x = '''
-    [f0]
-    f1 = float_add(f0, 3.5)
-    '''
-    loop = parse(x)
-    assert isinstance(loop.operations[0].getarg(0), BoxFloat)
-    
-def test_debug_merge_point():
-    x = '''
-    []
-    debug_merge_point("info", 0)
-    debug_merge_point('info', 1)
-    debug_merge_point('<some ('other,')> info', 1)
-    debug_merge_point('(stuff) #1', 1)
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getarg(0)._get_str() == 'info'
-    assert loop.operations[1].getarg(0)._get_str() == 'info'
-    assert loop.operations[2].getarg(0)._get_str() == "<some ('other,')> info"
-    assert loop.operations[3].getarg(0)._get_str() == "(stuff) #1"
-    
+    def test_parse_new_with_comma(self):
+        # this is generated by PYPYJITLOG, check that we can handle it
+        x = '''
+        []
+        p0 = new(, descr=<SizeDescr 12>)
+        '''
+        loop = self.parse(x)
+        assert loop.operations[0].getopname() == 'new'
 
-def test_descr_with_obj_print():
-    x = '''
-    [p0]
-    setfield_gc(p0, 1, descr=<SomeDescr>)
-    '''
-    loop = parse(x)
-    # assert did not explode
+    def test_no_fail_args(self):
+        x = '''
+        [i0]
+        guard_true(i0, descr=<Guard0>)
+        '''
+        loop = self.parse(x, nonstrict=True)
+        assert loop.operations[0].getfailargs() == []
 
-example_loop_log = '''\
-# bridge out of Guard12, 6 ops
-[i0, i1, i2]
-i4 = int_add(i0, 2)
-i6 = int_sub(i1, 1)
-i8 = int_gt(i6, 3)
-guard_true(i8, descr=<Guard15>) [i4, i6]
-debug_merge_point('(no jitdriver.get_printable_location!)', 0)
-jump(i6, i4, descr=<Loop0>)
-'''
+    def test_no_inputargs(self):
+        x = '''
+        i2 = int_add(i0, i1)
+        '''
+        loop = self.parse(x, nonstrict=True)
+        assert loop.inputargs == []
+        assert loop.operations[0].getopname() == 'int_add'
 
-def test_parse_no_namespace():
-    loop = parse(example_loop_log, no_namespace=True)
+    def test_offsets(self):
+        x = """
+        [i0, i1]
+        +10: i2 = int_add(i0, i1)
+        i3 = int_add(i2, 3)
+        """
+        #    +30: --end of the loop--
+        loop = self.parse(x)
+        assert loop.operations[0].offset == 10
+        assert not hasattr(loop.operations[1], 'offset')
 
-def test_attach_comment_to_loop():
-    loop = parse(example_loop_log, no_namespace=True)
-    assert loop.comment == '# bridge out of Guard12, 6 ops'
+    def test_last_offset(self):
+        x = """
+        [i0, i1]
+        +10: i2 = int_add(i0, i1)
+        i3 = int_add(i2, 3)
+        +30: --end of the loop--
+        """
+        loop = self.parse(x)
+        assert len(loop.operations) == 2
+        assert loop.last_offset == 30
 
-def test_parse_new_with_comma():
-    # this is generated by PYPYJITLOG, check that we can handle it
-    x = '''
-    []
-    p0 = new(, descr=<SizeDescr 12>)
-    '''
-    loop = parse(x)
-    assert loop.operations[0].getopname() == 'new'
 
-def test_no_fail_args():
-    x = '''
-    [i0]
-    guard_true(i0, descr=<Guard0>)
-    '''
-    loop = parse(x, nonstrict=True)
-    assert loop.operations[0].getfailargs() == []
+class TestOpParser(BaseTestOparser):
 
-def test_no_inputargs():
-    x = '''
-    i2 = int_add(i0, i1)
-    '''
-    loop = parse(x, nonstrict=True)
-    assert loop.inputargs == []
-    assert loop.operations[0].getopname() == 'int_add'
+    OpParser = OpParser
 
-def test_offsets():
-    x = """
-    [i0, i1]
-    +10: i2 = int_add(i0, i1)
-    i3 = int_add(i2, 3)
-    """
-    #    +30: --end of the loop--
-    loop = parse(x)
-    assert loop.operations[0].offset == 10
-    assert not hasattr(loop.operations[1], 'offset')
+    def test_boxkind(self):
+        x = """
+        [sum0]
+        """
+        loop = self.parse(x, None, {}, boxkinds={'sum': BoxInt})
+        b = loop.getboxes()
+        assert isinstance(b.sum0, BoxInt)
 
-def test_last_offset():
-    x = """
-    [i0, i1]
-    +10: i2 = int_add(i0, i1)
-    i3 = int_add(i2, 3)
-    +30: --end of the loop--
-    """
-    loop = parse(x)
-    assert len(loop.operations) == 2
-    assert loop.last_offset == 30
+
+class ForbiddenModule(object):
+    def __init__(self, name, old_mod):
+        self.name = name
+        self.old_mod = old_mod
+
+    def __getattr__(self, attr):
+        assert False, "You should not import module %s" % self.name
+
+
+class TestOpParserWithMock(BaseTestOparser):
+
+    class OpParser(OpParser):
+        use_mock_model = True
+
+    def setup_class(cls):
+        forbidden_mods = [
+            'pypy.jit.metainterp.history',
+            'pypy.rpython.lltypesystem.lltype',
+            ]
+        for modname in forbidden_mods:
+            if modname in sys.modules:
+                newmod = ForbiddenModule(modname, sys.modules[modname])
+                sys.modules[modname] = newmod
+
+    def teardown_class(cls):
+        for modname, mod in sys.modules.iteritems():
+            if isinstance(mod, ForbiddenModule):
+                sys.modules[modname] = mod.old_mod
diff --git a/pypy/module/__builtin__/__init__.py b/pypy/module/__builtin__/__init__.py
--- a/pypy/module/__builtin__/__init__.py
+++ b/pypy/module/__builtin__/__init__.py
@@ -5,20 +5,6 @@
 
 # put builtins here that should be optimized somehow
 
-OPTIMIZED_BUILTINS = ["len", "range", "xrange", "min", "max", "enumerate",
-        "isinstance", "type", "zip", "file", "format", "open", "abs", "chr",
-        "unichr", "ord", "pow", "repr", "hash", "oct", "hex", "round", "cmp",
-        "getattr", "setattr", "delattr", "callable", "int", "str", "float"]
-
-assert len(OPTIMIZED_BUILTINS) <= 256
-
-BUILTIN_TO_INDEX = {}
-
-for i, name in enumerate(OPTIMIZED_BUILTINS):
-    BUILTIN_TO_INDEX[name] = i
-
-assert len(OPTIMIZED_BUILTINS) == len(BUILTIN_TO_INDEX)
-
 class Module(MixedModule):
     """Built-in functions, exceptions, and other objects."""
     expose__file__attribute = False
@@ -31,6 +17,8 @@
 
         'apply'         : 'app_functional.apply',
         'sorted'        : 'app_functional.sorted',
+        'any'           : 'app_functional.any',
+        'all'           : 'app_functional.all',
         'vars'          : 'app_inspect.vars',
         'dir'           : 'app_inspect.dir',
 
@@ -95,8 +83,6 @@
         'range'         : 'functional.range_int',
         'xrange'        : 'functional.W_XRange',
         'enumerate'     : 'functional.W_Enumerate',
-        'all'           : 'functional.all',
-        'any'           : 'functional.any',
         'min'           : 'functional.min',
         'max'           : 'functional.max',
         'sum'           : 'functional.sum',
@@ -141,9 +127,6 @@
     def setup_after_space_initialization(self):
         """NOT_RPYTHON"""
         space = self.space
-        self.builtins_by_index = [None] * len(OPTIMIZED_BUILTINS)
-        for i, name in enumerate(OPTIMIZED_BUILTINS):
-            self.builtins_by_index[i] = space.getattr(self, space.wrap(name))
         # install the more general version of isinstance() & co. in the space
         from pypy.module.__builtin__ import abstractinst as ab
         space.abstract_isinstance_w = ab.abstract_isinstance_w.__get__(space)
diff --git a/pypy/module/__builtin__/app_functional.py b/pypy/module/__builtin__/app_functional.py
--- a/pypy/module/__builtin__/app_functional.py
+++ b/pypy/module/__builtin__/app_functional.py
@@ -16,3 +16,21 @@
     sorted_lst = list(lst)
     sorted_lst.sort(cmp, key, reverse)
     return sorted_lst
+
+def any(seq):
+    """any(iterable) -> bool
+
+Return True if bool(x) is True for any x in the iterable."""
+    for x in seq:
+        if x:
+            return True
+    return False
+
+def all(seq):
+    """all(iterable) -> bool
+
+Return True if bool(x) is True for all values x in the iterable."""
+    for x in seq:
+        if not x:
+            return False
+    return True
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -294,7 +294,7 @@
             break
         new_frame = space.createframe(code, w_func.w_func_globals,
                                       w_func.closure)
-        new_frame.fastlocals_w[0] = w_item
+        new_frame.locals_stack_w[0] = w_item
         w_res = new_frame.run()
         result_w.append(w_res)
     return result_w
@@ -452,40 +452,6 @@
     w_empty = space.call_function(w_str_type)
     return space.call_method(w_empty, "join", space.newlist(result_w))
 
-def all(space, w_S):
-    """all(iterable) -> bool
-
-Return True if bool(x) is True for all values x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if not space.is_true(w_next):
-            return space.w_False
-    return space.w_True
-
-
-def any(space, w_S):
-    """any(iterable) -> bool
-
-Return True if bool(x) is True for any x in the iterable."""
-    w_iter = space.iter(w_S)
-    while True:
-        try:
-            w_next = space.next(w_iter)
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise       # re-raise other app-level exceptions
-            break
-        if space.is_true(w_next):
-            return space.w_True
-    return space.w_False
-
-
 class W_Enumerate(Wrappable):
 
     def __init__(self, w_iter, w_start):
diff --git a/pypy/module/__builtin__/interp_classobj.py b/pypy/module/__builtin__/interp_classobj.py
--- a/pypy/module/__builtin__/interp_classobj.py
+++ b/pypy/module/__builtin__/interp_classobj.py
@@ -12,7 +12,7 @@
 
 
 def raise_type_err(space, argument, expected, w_obj):
-    type_name = space.type(w_obj).getname(space, '?')
+    type_name = space.type(w_obj).getname(space)
     raise operationerrfmt(space.w_TypeError,
                           "argument %s must be %s, not %s",
                           argument, expected, type_name)
diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py
--- a/pypy/module/__builtin__/test/test_builtin.py
+++ b/pypy/module/__builtin__/test/test_builtin.py
@@ -1,5 +1,6 @@
 import autopath
 import sys
+from pypy import conftest
 
 class AppTestBuiltinApp:
     def setup_class(cls):
@@ -15,6 +16,15 @@
             cls.w_sane_lookup = cls.space.wrap(True)
         except KeyError:
             cls.w_sane_lookup = cls.space.wrap(False)
+        # starting with CPython 2.6, when the stack is almost out, we
+        # can get a random error, instead of just a RuntimeError.
+        # For example if an object x has a __getattr__, we can get
+        # AttributeError if attempting to call x.__getattr__ runs out
+        # of stack.  That's annoying, so we just work around it.
+        if conftest.option.runappdirect:
+            cls.w_safe_runtimerror = cls.space.wrap(True)
+        else:
+            cls.w_safe_runtimerror = cls.space.wrap(sys.version_info < (2, 6))
 
     def test_bytes_alias(self):
         assert bytes is str
@@ -399,6 +409,8 @@
     def test_cmp_cyclic(self):
         if not self.sane_lookup:
             skip("underlying Python implementation has insane dict lookup")
+        if not self.safe_runtimerror:
+            skip("underlying Python may raise random exceptions on stack ovf")
         a = []; a.append(a)
         b = []; b.append(b)
         from UserList import UserList
@@ -619,62 +631,6 @@
         raises(TypeError, pr, end=3)
         raises(TypeError, pr, sep=42)
 
-class AppTestBuiltinOptimized(object):
-    def setup_class(cls):
-        from pypy.conftest import gettestobjspace
-        cls.space = gettestobjspace(**{"objspace.opcodes.CALL_LIKELY_BUILTIN": True})
-
-    # hum, we need to invoke the compiler explicitely
-    def test_xrange_len(self):
-        s = """def test():
-        x = xrange(33)
-        assert len(x) == 33
-        x = xrange(33.2)
-        assert len(x) == 33
-        x = xrange(33,0,-1)
-        assert len(x) == 33
-        x = xrange(33,0)
-        assert len(x) == 0
-        x = xrange(33,0.2)
-        assert len(x) == 0
-        x = xrange(0,33)
-        assert len(x) == 33
-        x = xrange(0,33,-1)
-        assert len(x) == 0
-        x = xrange(0,33,2)
-        assert len(x) == 17
-        x = xrange(0,32,2)
-        assert len(x) == 16
-        """
-        ns = {}
-        exec s in ns
-        ns["test"]()
-
-    def test_delete_from_builtins(self):
-        s = """ """
-        # XXX write this test!
-
-    def test_shadow_case_bound_method(self):
-        s = """def test(l):
-        n = len(l)
-        old_len = len
-        class A(object):
-            x = 5
-            def length(self, o):
-                return self.x*old_len(o)
-        import __builtin__
-        __builtin__.len = A().length
-        try:
-            m = len(l)
-        finally:
-            __builtin__.len = old_len
-        return n+m
-        """
-        ns = {}
-        exec s in ns
-        res = ns["test"]([2,3,4])
-        assert res == 18
-
     def test_round(self):
         assert round(11.234) == 11.0
         assert round(11.234, -1) == 10.0
diff --git a/pypy/module/__builtin__/test/test_classobj.py b/pypy/module/__builtin__/test/test_classobj.py
--- a/pypy/module/__builtin__/test/test_classobj.py
+++ b/pypy/module/__builtin__/test/test_classobj.py
@@ -987,9 +987,9 @@
         if option.runappdirect:
             py.test.skip("can only be run on py.py")
         def is_strdict(space, w_class):
-            from pypy.objspace.std.dictmultiobject import StrDictImplementation
+            from pypy.objspace.std.dictmultiobject import StringDictStrategy
             w_d = w_class.getdict(space)
-            return space.wrap(isinstance(w_d, StrDictImplementation) and w_d.r_dict_content is None)
+            return space.wrap(isinstance(w_d.strategy, StringDictStrategy))
 
         cls.w_is_strdict = cls.space.wrap(gateway.interp2app(is_strdict))
 
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -3,6 +3,14 @@
 from pypy.interpreter.mixedmodule import MixedModule
 from pypy.module.imp.importing import get_pyc_magic
 
+
+class BuildersModule(MixedModule):
+    appleveldefs = {}
+
+    interpleveldefs = {
+        "UnicodeBuilder": "interp_builders.W_UnicodeBuilder",
+    }
+
 class Module(MixedModule):
     appleveldefs = {
     }
@@ -19,6 +27,10 @@
         'lookup_special'            : 'interp_magic.lookup_special',
     }
 
+    submodules = {
+        "builders": BuildersModule,
+    }
+
     def setup_after_space_initialization(self):
         """NOT_RPYTHON"""
         if not self.space.config.translating:
diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/interp_builders.py
@@ -0,0 +1,50 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef
+from pypy.rlib.rstring import UnicodeBuilder
+
+
+class W_UnicodeBuilder(Wrappable):
+    def __init__(self, space, size):
+        if size == -1:
+            self.builder = UnicodeBuilder()
+        else:
+            self.builder = UnicodeBuilder(size)
+        self.done = False
+
+    def _check_done(self, space):
+        if self.done:
+            raise OperationError(space.w_ValueError, space.wrap("Can't operate on a done builder"))
+
+    @unwrap_spec(size=int)
+    def descr__new__(space, w_subtype, size=-1):
+        return W_UnicodeBuilder(space, size)
+
+    @unwrap_spec(s=unicode)
+    def descr_append(self, space, s):
+        self._check_done(space)
+        self.builder.append(s)
+
+    @unwrap_spec(s=unicode, start=int, end=int)
+    def descr_append_slice(self, space, s, start, end):
+        self._check_done(space)
+        if not 0 <= start <= end <= len(s):
+            raise OperationError(space.w_ValueError, space.wrap("bad start/stop"))
+        self.builder.append_slice(s, start, end)
+
+    def descr_build(self, space):
+        self._check_done(space)
+        w_s = space.wrap(self.builder.build())
+        self.done = True
+        return w_s
+
+
+W_UnicodeBuilder.typedef = TypeDef("UnicodeBuilder",
+    __new__ = interp2app(W_UnicodeBuilder.descr__new__.im_func),
+
+    append = interp2app(W_UnicodeBuilder.descr_append),
+    append_slice = interp2app(W_UnicodeBuilder.descr_append_slice),
+    build = interp2app(W_UnicodeBuilder.descr_build),
+)
+W_UnicodeBuilder.typedef.acceptable_as_base_class = False
\ No newline at end of file
diff --git a/pypy/module/__pypy__/interp_debug.py b/pypy/module/__pypy__/interp_debug.py
--- a/pypy/module/__pypy__/interp_debug.py
+++ b/pypy/module/__pypy__/interp_debug.py
@@ -1,15 +1,19 @@
 from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
 from pypy.interpreter.error import OperationError
-from pypy.rlib import debug
+from pypy.rlib import debug, jit
 
+
+ at jit.dont_look_inside
 @unwrap_spec(category=str)
 def debug_start(space, category):
     debug.debug_start(category)
 
+ at jit.dont_look_inside
 def debug_print(space, args_w):
     parts = [space.str_w(space.str(w_item)) for w_item in args_w]
     debug.debug_print(' '.join(parts))
 
+ at jit.dont_look_inside
 @unwrap_spec(category=str)
 def debug_stop(space, category):
     debug.debug_stop(category)
diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/test/test_builders.py
@@ -0,0 +1,34 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestBuilders(object):
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['__pypy__'])
+
+    def test_simple(self):
+        from __pypy__.builders import UnicodeBuilder
+        b = UnicodeBuilder()
+        b.append(u"abc")
+        b.append(u"123")
+        b.append(u"1")
+        s = b.build()
+        assert s == u"abc1231"
+        raises(ValueError, b.build)
+        raises(ValueError, b.append, u"123")
+
+    def test_preallocate(self):
+        from __pypy__.builders import UnicodeBuilder
+        b = UnicodeBuilder(10)
+        b.append(u"abc")
+        b.append(u"123")
+        s = b.build()
+        assert s == u"abc123"
+
+    def test_append_slice(self):
+        from __pypy__.builders import UnicodeBuilder
+        b = UnicodeBuilder()
+        b.append_slice(u"abcdefgh", 2, 5)
+        raises(ValueError, b.append_slice, u"1", 2, 1)
+        s = b.build()
+        assert s == "cde"
+        raises(ValueError, b.append_slice, u"abc", 1, 2)
\ No newline at end of file
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -32,15 +32,22 @@
                 space.wrap(reason))
             w_res = space.call_function(w_errorhandler, w_exc)
             if (not space.is_true(space.isinstance(w_res, space.w_tuple))
-                or space.len_w(w_res) != 2):
+                or space.len_w(w_res) != 2
+                or not space.is_true(space.isinstance(
+                                 space.getitem(w_res, space.wrap(0)),
+                                 space.w_unicode))):
+                if decode:
+                    msg = ("decoding error handler must return "
+                           "(unicode, int) tuple, not %s")
+                else:
+                    msg = ("encoding error handler must return "
+                           "(unicode, int) tuple, not %s")
                 raise operationerrfmt(
-                    space.w_TypeError,
-                    "encoding error handler must return "
-                    "(unicode, int) tuple, not %s",
+                    space.w_TypeError, msg,
                     space.str_w(space.repr(w_res)))
             w_replace, w_newpos = space.fixedview(w_res, 2)
             newpos = space.int_w(w_newpos)
-            if (newpos < 0):
+            if newpos < 0:
                 newpos = len(input) + newpos
             if newpos < 0 or newpos > len(input):
                 raise operationerrfmt(
@@ -50,7 +57,9 @@
                 replace = space.unicode_w(w_replace)
                 return replace, newpos
             else:
-                replace = space.str_w(w_replace)
+                from pypy.objspace.std.unicodetype import encode_object
+                w_str = encode_object(space, w_replace, encoding, None)
+                replace = space.str_w(w_str)
                 return replace, newpos
         return unicode_call_errorhandler
 
@@ -160,15 +169,7 @@
 def ignore_errors(space, w_exc):
     check_exception(space, w_exc)
     w_end = space.getattr(w_exc, space.wrap('end'))
-    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        return space.newtuple([space.wrap(''), w_end])
-    elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
-          space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
-        return space.newtuple([space.wrap(u''), w_end])
-    else:
-        typename = space.type(w_exc).getname(space, '?')
-        raise operationerrfmt(space.w_TypeError,
-            "don't know how to handle %s in error callback", typename)
+    return space.newtuple([space.wrap(u''), w_end])
 
 def replace_errors(space, w_exc):
     check_exception(space, w_exc)
@@ -176,7 +177,7 @@
     w_end = space.getattr(w_exc, space.wrap('end'))
     size = space.int_w(w_end) - space.int_w(w_start)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        text = '?' * size
+        text = u'?' * size
         return space.newtuple([space.wrap(text), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         text = u'\ufffd'
@@ -185,7 +186,7 @@
         text = u'\ufffd' * size
         return space.newtuple([space.wrap(text), w_end])
     else:
-        typename = space.type(w_exc).getname(space, '?')
+        typename = space.type(w_exc).getname(space)
         raise operationerrfmt(space.w_TypeError,
             "don't know how to handle %s in error callback", typename)
 
@@ -206,7 +207,7 @@
             pos += 1
         return space.newtuple([space.wrap(builder.build()), w_end])
     else:
-        typename = space.type(w_exc).getname(space, '?')
+        typename = space.type(w_exc).getname(space)
         raise operationerrfmt(space.w_TypeError,
             "don't know how to handle %s in error callback", typename)
 
@@ -239,7 +240,7 @@
             pos += 1
         return space.newtuple([space.wrap(builder.build()), w_end])
     else:
-        typename = space.type(w_exc).getname(space, '?')
+        typename = space.type(w_exc).getname(space)
         raise operationerrfmt(space.w_TypeError,
             "don't know how to handle %s in error callback", typename)
 
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -540,6 +540,17 @@
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 
+    def test_encode_error_bad_handler(self):
+        import codecs
+        codecs.register_error("test.bad_handler", lambda e: (repl, 1))
+        assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+        repl = u"\u1234"
+        raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
+               "test.bad_handler")
+        repl = u"\u00E9"
+        s = u"\u5678".encode("latin-1", "test.bad_handler")
+        assert s == '\xe9'
+
     def test_charmap_encode(self):
         assert 'xxx'.encode('charmap') == 'xxx'
 
@@ -593,3 +604,11 @@
         assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
         assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
         assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
+    def test_bad_handler_string_result(self):
+        import _codecs
+        def f(exc):
+            return ('foo', exc.end)
+        _codecs.register_error("test.test_codecs_not_a_string", f)
+        raises(TypeError, u'\u1234'.encode, 'ascii',
+               'test.test_codecs_not_a_string')
diff --git a/pypy/module/_ffi/__init__.py b/pypy/module/_ffi/__init__.py
--- a/pypy/module/_ffi/__init__.py
+++ b/pypy/module/_ffi/__init__.py
@@ -4,8 +4,10 @@
 class Module(MixedModule):
 
     interpleveldefs = {
-        'CDLL'               : 'interp_ffi.W_CDLL',
-        'types':             'interp_ffi.W_types',
+        'CDLL':    'interp_ffi.W_CDLL',
+        'types':   'interp_ffi.W_types',
+        'FuncPtr': 'interp_ffi.W_FuncPtr',
+        'get_libc':'interp_ffi.get_libc',
     }
 
     appleveldefs = {}
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -4,63 +4,176 @@
     operationerrfmt
 from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 #
 from pypy.rpython.lltypesystem import lltype, rffi
 #
 from pypy.rlib import jit
 from pypy.rlib import libffi
 from pypy.rlib.rdynload import DLOpenError
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, r_uint
 
 class W_FFIType(Wrappable):
-    def __init__(self, name, ffitype):
+
+    _immutable_fields_ = ['name', 'ffitype', 'w_datashape', 'w_pointer_to']
+    
+    def __init__(self, name, ffitype, w_datashape=None, w_pointer_to=None):
         self.name = name
         self.ffitype = ffitype
+        self.w_datashape = w_datashape
+        self.w_pointer_to = w_pointer_to
+        if self.is_struct():
+            assert w_datashape is not None
 
-    def str(self, space):
-        return space.wrap('<ffi type %s>' % self.name)
+    def descr_deref_pointer(self, space):
+        if self.w_pointer_to is None:
+            return space.w_None
+        return self.w_pointer_to
 
+    def repr(self, space):
+        return space.wrap(self.__repr__())
 
+    def __repr__(self):
+        return "<ffi type %s>" % self.name
+
+    def is_signed(self):
+        return (self is app_types.slong or
+                self is app_types.sint or
+                self is app_types.sshort or
+                self is app_types.sbyte or
+                self is app_types.slonglong)
+
+    def is_unsigned(self):
+        return (self is app_types.ulong or
+                self is app_types.uint or
+                self is app_types.ushort or
+                self is app_types.ubyte or
+                self is app_types.ulonglong)
+
+    def is_pointer(self):
+        return self.ffitype is libffi.types.pointer
+
+    def is_char(self):
+        return self is app_types.char
+
+    def is_unichar(self):
+        return self is app_types.unichar
+
+    def is_longlong(self):
+        return libffi.IS_32_BIT and (self is app_types.slonglong or
+                                     self is app_types.ulonglong)
+
+    def is_double(self):
+        return self is app_types.double
+
+    def is_singlefloat(self):
+        return self is app_types.float
+
+    def is_void(self):
+        return self is app_types.void
+
+    def is_struct(self):
+        return libffi.types.is_struct(self.ffitype)
 
 W_FFIType.typedef = TypeDef(
     'FFIType',
-    __str__ = interp2app(W_FFIType.str),
+    __repr__ = interp2app(W_FFIType.repr),
+    deref_pointer = interp2app(W_FFIType.descr_deref_pointer),
     )
 
 
+def build_ffi_types():
+    from pypy.rlib.clibffi import FFI_TYPE_P
+    types = [
+        # note: most of the type name directly come from the C equivalent,
+        # with the exception of bytes: in C, ubyte and char are equivalent,
+        # but for _ffi the first expects a number while the second a 1-length
+        # string
+        W_FFIType('slong',     libffi.types.slong),
+        W_FFIType('sint',      libffi.types.sint),
+        W_FFIType('sshort',    libffi.types.sshort),
+        W_FFIType('sbyte',     libffi.types.schar),
+        W_FFIType('slonglong', libffi.types.slonglong),
+        #
+        W_FFIType('ulong',     libffi.types.ulong),
+        W_FFIType('uint',      libffi.types.uint),
+        W_FFIType('ushort',    libffi.types.ushort),
+        W_FFIType('ubyte',     libffi.types.uchar),
+        W_FFIType('ulonglong', libffi.types.ulonglong),
+        #
+        W_FFIType('char',      libffi.types.uchar),
+        W_FFIType('unichar',   libffi.types.wchar_t),
+        #
+        W_FFIType('double',    libffi.types.double),
+        W_FFIType('float',     libffi.types.float),
+        W_FFIType('void',      libffi.types.void),
+        W_FFIType('void_p',    libffi.types.pointer),
+        #
+        # missing types:
+
+        ## 's' : ffi_type_pointer,
+        ## 'z' : ffi_type_pointer,
+        ## 'O' : ffi_type_pointer,
+        ## 'Z' : ffi_type_pointer,
+
+        ]
+    return dict([(t.name, t) for t in types])
+
+class app_types:
+    pass
+app_types.__dict__ = build_ffi_types()
+
+def descr_new_pointer(space, w_cls, w_pointer_to):
+    try:
+        return descr_new_pointer.cache[w_pointer_to]
+    except KeyError:
+        w_pointer_to = space.interp_w(W_FFIType, w_pointer_to)
+        name = '(pointer to %s)' % w_pointer_to.name
+        w_result = W_FFIType(name, libffi.types.pointer, w_pointer_to = w_pointer_to)
+        descr_new_pointer.cache[w_pointer_to] = w_result
+        return w_result
+descr_new_pointer.cache = {}
+
 class W_types(Wrappable):
     pass
-
-def build_ffi_types():
-    from pypy.rlib.clibffi import FFI_TYPE_P
-    tdict = {}
-    for key, value in libffi.types.__dict__.iteritems():
-        if key == 'getkind' or key.startswith('__'):
-            continue
-        assert lltype.typeOf(value) == FFI_TYPE_P
-        tdict[key] = W_FFIType(key, value)
-    return tdict
-    
 W_types.typedef = TypeDef(
     'types',
-    **build_ffi_types())
+    Pointer = interp2app(descr_new_pointer, as_classmethod=True),
+    **app_types.__dict__)
+
+
+def unwrap_ffitype(space, w_argtype, allow_void=False):
+    res = w_argtype.ffitype
+    if res is libffi.types.void and not allow_void:
+        msg = 'void is not a valid argument type'
+        raise OperationError(space.w_TypeError, space.wrap(msg))
+    return res
+
+def unwrap_truncate_int(TP, space, w_arg):
+    if space.is_true(space.isinstance(w_arg, space.w_int)):
+        return rffi.cast(TP, space.int_w(w_arg))
+    else:
+        return rffi.cast(TP, space.bigint_w(w_arg).ulonglongmask())
+unwrap_truncate_int._annspecialcase_ = 'specialize:arg(0)'
 
 # ========================================================================
 
 class W_FuncPtr(Wrappable):
 
-    _immutable_fields_ = ['func']
+    _immutable_fields_ = ['func', 'argtypes_w[*]', 'w_restype']
     
-    def __init__(self, func):
+    def __init__(self, func, argtypes_w, w_restype):
         self.func = func
+        self.argtypes_w = argtypes_w
+        self.w_restype = w_restype
 
     @jit.unroll_safe
-    def build_argchain(self, space, argtypes, args_w):
-        expected = len(argtypes)
+    def build_argchain(self, space, args_w):
+        expected = len(self.argtypes_w)
         given = len(args_w)
         if given != expected:
             arg = 'arguments'
-            if len(argtypes) == 1:
+            if len(self.argtypes_w) == 1:
                 arg = 'argument'
             raise operationerrfmt(space.w_TypeError,
                                   '%s() takes exactly %d %s (%d given)',
@@ -68,34 +181,97 @@
         #
         argchain = libffi.ArgChain()
         for i in range(expected):
-            argtype = argtypes[i]
+            w_argtype = self.argtypes_w[i]
             w_arg = args_w[i]
-            kind = libffi.types.getkind(argtype)
-            if kind == 'i':
+            if w_argtype.is_longlong():
+                # note that we must check for longlong first, because either
+                # is_signed or is_unsigned returns true anyway
+                assert libffi.IS_32_BIT
+                self.arg_longlong(space, argchain, w_arg)
+            elif w_argtype.is_signed():
+                argchain.arg(unwrap_truncate_int(rffi.LONG, space, w_arg))
+            elif w_argtype.is_pointer():
+                w_arg = self.convert_pointer_arg_maybe(space, w_arg, w_argtype)
+                argchain.arg(intmask(space.uint_w(w_arg)))
+            elif w_argtype.is_unsigned():
+                argchain.arg(unwrap_truncate_int(rffi.ULONG, space, w_arg))
+            elif w_argtype.is_char():
+                w_arg = space.ord(w_arg)
                 argchain.arg(space.int_w(w_arg))
-            elif kind == 'u':
-                argchain.arg(intmask(space.uint_w(w_arg)))
-            elif kind == 'f':
+            elif w_argtype.is_unichar():
+                w_arg = space.ord(w_arg)
+                argchain.arg(space.int_w(w_arg))
+            elif w_argtype.is_double():
                 argchain.arg(space.float_w(w_arg))
+            elif w_argtype.is_singlefloat():
+                argchain.arg_singlefloat(space.float_w(w_arg))
+            elif w_argtype.is_struct():
+                # arg_raw directly takes value to put inside ll_args
+                w_arg = space.interp_w(W_StructureInstance, w_arg)                
+                ptrval = w_arg.ll_buffer
+                argchain.arg_raw(ptrval)
             else:
-                assert False, "Argument kind '%s' not supported" % kind
+                assert False, "Argument shape '%s' not supported" % w_argtype
         return argchain
 
+    def convert_pointer_arg_maybe(self, space, w_arg, w_argtype):
+        """
+        Try to convert the argument by calling _as_ffi_pointer_()
+        """
+        meth = space.lookup(w_arg, '_as_ffi_pointer_') # this also promotes the type
+        if meth:
+            return space.call_function(meth, w_arg, w_argtype)
+        else:
+            return w_arg
+
+    @jit.dont_look_inside
+    def arg_longlong(self, space, argchain, w_arg):
+        bigarg = space.bigint_w(w_arg)
+        ullval = bigarg.ulonglongmask()
+        llval = rffi.cast(rffi.LONGLONG, ullval)
+        # this is a hack: we store the 64 bits of the long long into the
+        # 64 bits of a float (i.e., a C double)
+        floatval = libffi.longlong2float(llval)
+        argchain.arg_longlong(floatval)
+
     def call(self, space, args_w):
-        self = jit.hint(self, promote=True)
-        argchain = self.build_argchain(space, self.func.argtypes, args_w)
-        reskind = libffi.types.getkind(self.func.restype)
-        if reskind == 'i':
+        self = jit.promote(self)
+        argchain = self.build_argchain(space, args_w)
+        w_restype = self.w_restype
+        if w_restype.is_longlong():
+            # note that we must check for longlong first, because either
+            # is_signed or is_unsigned returns true anyway
+            assert libffi.IS_32_BIT
+            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
+            return self._call_longlong(space, argchain, reskind)
+        elif w_restype.is_signed():
             return self._call_int(space, argchain)
-        elif reskind == 'u':
+        elif w_restype.is_unsigned() or w_restype.is_pointer():
             return self._call_uint(space, argchain)
-        elif reskind == 'f':
+        elif w_restype.is_char():
+            intres = self.func.call(argchain, rffi.UCHAR)
+            return space.wrap(chr(intres))
+        elif w_restype.is_unichar():
+            intres = self.func.call(argchain, rffi.WCHAR_T)
+            return space.wrap(unichr(intres))
+        elif w_restype.is_double():
             floatres = self.func.call(argchain, rffi.DOUBLE)
             return space.wrap(floatres)
-        else:
+        elif w_restype.is_singlefloat():
+            # the result is a float, but widened to be inside a double
+            floatres = self.func.call(argchain, rffi.FLOAT)
+            return space.wrap(floatres)
+        elif w_restype.is_struct():
+            w_datashape = w_restype.w_datashape
+            assert isinstance(w_datashape, W_Structure)
+            ptrval = self.func.call(argchain, rffi.ULONG, is_struct=True)
+            return w_datashape.fromaddress(space, ptrval)
+        elif w_restype.is_void():
             voidres = self.func.call(argchain, lltype.Void)
             assert voidres is None
             return space.w_None
+        else:
+            assert False, "Return value shape '%s' not supported" % w_restype
 
     def _call_int(self, space, argchain):
         # if the declared return type of the function is smaller than LONG,
@@ -138,6 +314,10 @@
             # special case
             uintres = call(argchain, rffi.ULONG)
             return space.wrap(uintres)
+        elif restype is libffi.types.pointer:
+            ptrres = call(argchain, rffi.VOIDP)
+            uintres = rffi.cast(rffi.ULONG, ptrres)
+            return space.wrap(uintres)
         elif restype is libffi.types.uint:
             intres = rffi.cast(rffi.LONG, call(argchain, rffi.UINT))
         elif restype is libffi.types.ushort:
@@ -149,16 +329,52 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
+    @jit.dont_look_inside
+    def _call_longlong(self, space, argchain, reskind):
+        # this is a hack: we store the 64 bits of the long long into the 64
+        # bits of a float (i.e., a C double)
+        floatres = self.func.call(argchain, rffi.LONGLONG)
+        llres = libffi.float2longlong(floatres)
+        if reskind == 'I':
+            return space.wrap(llres)
+        elif reskind == 'U':
+            ullres = rffi.cast(rffi.ULONGLONG, llres)
+            return space.wrap(ullres)
+        else:
+            assert False
+
     def getaddr(self, space):
         """
         Return the physical address in memory of the function
         """
         return space.wrap(rffi.cast(rffi.LONG, self.func.funcsym))
 
+
+
+def unpack_argtypes(space, w_argtypes, w_restype):
+    argtypes_w = [space.interp_w(W_FFIType, w_argtype)
+                  for w_argtype in space.listview(w_argtypes)]
+    argtypes = [unwrap_ffitype(space, w_argtype) for w_argtype in
+                argtypes_w]
+    w_restype = space.interp_w(W_FFIType, w_restype)
+    restype = unwrap_ffitype(space, w_restype, allow_void=True)
+    return argtypes_w, argtypes, w_restype, restype
+
+ at unwrap_spec(addr=r_uint, name=str)
+def descr_fromaddr(space, w_cls, addr, name, w_argtypes, w_restype):
+    argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                               w_argtypes,
+                                                               w_restype)
+    addr = rffi.cast(rffi.VOIDP, addr)
+    func = libffi.Func(name, argtypes, restype, addr)
+    return W_FuncPtr(func, argtypes_w, w_restype)
+
+
 W_FuncPtr.typedef = TypeDef(
-    'FuncPtr',
+    '_ffi.FuncPtr',
     __call__ = interp2app(W_FuncPtr.call),
     getaddr = interp2app(W_FuncPtr.getaddr),
+    fromaddr = interp2app(descr_fromaddr, as_classmethod=True)
     )
 
 
@@ -167,40 +383,57 @@
 
 class W_CDLL(Wrappable):
     def __init__(self, space, name):
+        self.space = space
+        if name is None:
+            self.name = "<None>"
+        else:
+            self.name = name
         try:
             self.cdll = libffi.CDLL(name)
         except DLOpenError, e:
-            raise operationerrfmt(space.w_OSError, '%s: %s', name,
+            raise operationerrfmt(space.w_OSError, '%s: %s', self.name,
                                   e.msg or 'unspecified error')
-        self.name = name
-        self.space = space
-
-    def ffitype(self, w_argtype, allow_void=False):
-        res = self.space.interp_w(W_FFIType, w_argtype).ffitype
-        if res is libffi.types.void and not allow_void:
-            space = self.space
-            msg = 'void is not a valid argument type'
-            raise OperationError(space.w_TypeError, space.wrap(msg))
-        return res
 
     @unwrap_spec(name=str)
     def getfunc(self, space, name, w_argtypes, w_restype):
-        argtypes = [self.ffitype(w_argtype) for w_argtype in
-                    space.listview(w_argtypes)]
-        restype = self.ffitype(w_restype, allow_void=True)
-        func = self.cdll.getpointer(name, argtypes, restype)
-        return W_FuncPtr(func)
+        argtypes_w, argtypes, w_restype, restype = unpack_argtypes(space,
+                                                                   w_argtypes,
+                                                                   w_restype)
+        try:
+            func = self.cdll.getpointer(name, argtypes, restype)
+        except KeyError:
+            raise operationerrfmt(space.w_AttributeError,
+                                  "No symbol %s found in library %s", name, self.name)
+            
+        return W_FuncPtr(func, argtypes_w, w_restype)
 
+    @unwrap_spec(name=str)
+    def getaddressindll(self, space, name):
+        try:
+            address_as_uint = rffi.cast(lltype.Unsigned,
+                                        self.cdll.getaddressindll(name))
+        except KeyError:
+            raise operationerrfmt(space.w_ValueError,
+                                  "No symbol %s found in library %s", name, self.name)
+        return space.wrap(address_as_uint)
 
- at unwrap_spec(name=str)
+ at unwrap_spec(name='str_or_None')
 def descr_new_cdll(space, w_type, name):
     return space.wrap(W_CDLL(space, name))
 
 
 W_CDLL.typedef = TypeDef(
-    'CDLL',
+    '_ffi.CDLL',
     __new__     = interp2app(descr_new_cdll),
     getfunc     = interp2app(W_CDLL.getfunc),
+    getaddressindll = interp2app(W_CDLL.getaddressindll),
     )
 
 # ========================================================================
+
+def get_libc(space):
+    from pypy.rlib.clibffi import get_libc_name
+    try:
+        return space.wrap(W_CDLL(space, get_libc_name()))
+    except OSError, e:
+        raise wrap_oserror(space, e)
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -17,7 +17,13 @@
 
         c_file = udir.ensure("test__ffi", dir=1).join("foolib.c")
         # automatically collect the C source from the docstrings of the tests
-        snippets = []
+        snippets = ["""
+        #ifdef _WIN32
+        #define DLLEXPORT __declspec(dllexport)
+        #else
+        #define DLLEXPORT
+        #endif
+        """]
         for name in dir(cls):
             if name.startswith('test_'):
                 meth = getattr(cls, name)
@@ -35,8 +41,9 @@
         from pypy.rpython.lltypesystem import rffi
         from pypy.rlib.libffi import get_libc_name, CDLL, types
         from pypy.rlib.test.test_libffi import get_libm_name
-        space = gettestobjspace(usemodules=('_ffi',))
+        space = gettestobjspace(usemodules=('_ffi', '_rawffi'))
         cls.space = space
+        cls.w_iswin32 = space.wrap(sys.platform == 'win32')
         cls.w_libfoo_name = space.wrap(cls.prepare_c_example())
         cls.w_libc_name = space.wrap(get_libc_name())
         libm_name = get_libm_name(sys.platform)
@@ -45,6 +52,13 @@
         pow = libm.getpointer('pow', [], types.void)
         pow_addr = rffi.cast(rffi.LONG, pow.funcsym)
         cls.w_pow_addr = space.wrap(pow_addr)
+        #
+        # these are needed for test_single_float_args
+        from ctypes import c_float
+        f_12_34 = c_float(12.34).value
+        f_56_78 = c_float(56.78).value
+        f_result = c_float(f_12_34 + f_56_78).value
+        cls.w_f_12_34_plus_56_78 = space.wrap(f_result)
 
     def test_libload(self):
         import _ffi
@@ -54,10 +68,20 @@
         import _ffi
         raises(OSError, _ffi.CDLL, "xxxxx_this_name_does_not_exist_xxxxx")
 
+    def test_libload_None(self):
+        if self.iswin32:
+            skip("unix specific")
+        from _ffi import CDLL, types
+        # this should return *all* loaded libs, dlopen(NULL)
+        dll = CDLL(None)
+        # Assume CPython, or PyPy compiled with cpyext
+        res = dll.getfunc('Py_IsInitialized', [], types.slong)()
+        assert res == 1
+
     def test_simple_types(self):
         from _ffi import types
-        assert str(types.sint) == '<ffi type sint>'
-        assert str(types.uint) == '<ffi type uint>'
+        assert str(types.sint) == "<ffi type sint>"
+        assert str(types.uint) == "<ffi type uint>"
         
     def test_callfunc(self):
         from _ffi import CDLL, types
@@ -70,24 +94,42 @@
         libm = CDLL(self.libm_name)
         pow = libm.getfunc('pow', [types.double, types.double], types.double)
         assert pow.getaddr() == self.pow_addr
-        
+
+    def test_getaddressindll(self):
+        import sys
+        from _ffi import CDLL, types
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        assert pow_addr == self.pow_addr & (sys.maxint*2-1)
+
+    def test_func_fromaddr(self):
+        import sys
+        from _ffi import CDLL, types, FuncPtr
+        libm = CDLL(self.libm_name)
+        pow_addr = libm.getaddressindll('pow')
+        pow = FuncPtr.fromaddr(pow_addr, 'pow', [types.double, types.double],
+                               types.double)
+        assert pow(2, 3) == 8
+
     def test_int_args(self):
         """
-            int sum_xy(int x, int y)
+            DLLEXPORT int sum_xy(int x, int y)
             {
                 return x+y;
             }
         """
+        import sys
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
         sum_xy = libfoo.getfunc('sum_xy', [types.sint, types.sint], types.sint)
         assert sum_xy(30, 12) == 42
+        assert sum_xy(sys.maxint*2, 0) == -2
 
     def test_void_result(self):
         """
             int dummy = 0;
-            void set_dummy(int val) { dummy = val; }
-            int get_dummy() { return dummy; }
+            DLLEXPORT void set_dummy(int val) { dummy = val; }
+            DLLEXPORT int get_dummy() { return dummy; }
         """
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -96,10 +138,105 @@
         assert get_dummy() == 0
         assert set_dummy(42) is None
         assert get_dummy() == 42
+        set_dummy(0)
+
+    def test_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr() { return &dummy; }
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val) { *ptr = val; }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_convert_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        class MyPointerWrapper(object):
+            def __init__(self, value):
+                self.value = value
+            def _as_ffi_pointer_(self, ffitype):
+                assert ffitype is types.void_p
+                return self.value
+        
+        libfoo = CDLL(self.libfoo_name)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], types.void_p)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr',
+                                        [types.void_p, types.sint],
+                                        types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        assert type(ptr) in (int, long)
+        ptr2 = MyPointerWrapper(ptr)
+        set_val_to_ptr(ptr2, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr2, 0)
+
+    def test_typed_pointer(self):
+        from _ffi import types
+        intptr = types.Pointer(types.sint) # create a typed pointer to sint
+        assert intptr.deref_pointer() is types.sint
+        assert str(intptr) == '<ffi type (pointer to sint)>'
+        assert types.sint.deref_pointer() is None
+        raises(TypeError, "types.Pointer(42)")
+
+    def test_pointer_identity(self):
+        from _ffi import types
+        x = types.Pointer(types.slong)
+        y = types.Pointer(types.slong)
+        z = types.Pointer(types.char)
+        assert x is y
+        assert x is not z
+
+    def test_typed_pointer_args(self):
+        """
+            extern int dummy; // defined in test_void_result 
+            DLLEXPORT int* get_dummy_ptr(); // defined in test_pointer_args
+            DLLEXPORT void set_val_to_ptr(int* ptr, int val); // ditto
+        """
+        from _ffi import CDLL, types
+
+        libfoo = CDLL(self.libfoo_name)
+        intptr = types.Pointer(types.sint)
+        get_dummy = libfoo.getfunc('get_dummy', [], types.sint)
+        get_dummy_ptr = libfoo.getfunc('get_dummy_ptr', [], intptr)
+        set_val_to_ptr = libfoo.getfunc('set_val_to_ptr', [intptr, types.sint], types.void)
+        assert get_dummy() == 0
+        ptr = get_dummy_ptr()
+        set_val_to_ptr(ptr, 123)
+        assert get_dummy() == 123
+        set_val_to_ptr(ptr, 0)
+
+    def test_huge_pointer_args(self):
+        """
+            #include <stdlib.h>
+            DLLEXPORT long is_null_ptr(void* ptr) { return ptr == NULL; }
+        """
+        import sys
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        is_null_ptr = libfoo.getfunc('is_null_ptr', [types.void_p], types.ulong)
+        assert not is_null_ptr(sys.maxint+1)
 
     def test_unsigned_long_args(self):
         """
-            unsigned long sum_xy_ul(unsigned long x, unsigned long y)
+            DLLEXPORT unsigned long sum_xy_ul(unsigned long x, unsigned long y)
             {
                 return x+y;
             }
@@ -111,15 +248,17 @@
                                 types.ulong)
         assert sum_xy(sys.maxint, 12) == sys.maxint+12
         assert sum_xy(sys.maxint+1, 12) == sys.maxint+13
+        #
+        res = sum_xy(sys.maxint*2+3, 0)
+        assert res == 1
 
     def test_unsigned_short_args(self):
         """
-            unsigned short sum_xy_us(unsigned short x, unsigned short y)
+            DLLEXPORT unsigned short sum_xy_us(unsigned short x, unsigned short y)
             {
                 return x+y;
             }
         """
-        import sys
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
         sum_xy = libfoo.getfunc('sum_xy_us', [types.ushort, types.ushort],
@@ -127,6 +266,169 @@
         assert sum_xy(32000, 8000) == 40000
         assert sum_xy(60000, 30000) == 90000 % 65536
 
+    def test_unsigned_byte_args(self):
+        """
+            DLLEXPORT unsigned char sum_xy_ub(unsigned char x, unsigned char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_us', [types.ubyte, types.ubyte],
+                                types.ubyte)
+        assert sum_xy(100, 40) == 140
+        assert sum_xy(200, 60) == 260 % 256
+
+    def test_signed_byte_args(self):
+        """
+            DLLEXPORT signed char sum_xy_sb(signed char x, signed char y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_sb', [types.sbyte, types.sbyte],
+                                types.sbyte)
+        assert sum_xy(10, 20) == 30
+        assert sum_xy(100, 28) == -128
+
+    def test_char_args(self):
+        """
+            DLLEXPORT char my_toupper(char x)
+            {
+                return x - ('a'-'A');
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        my_toupper = libfoo.getfunc('my_toupper', [types.char],
+                                    types.char)
+        assert my_toupper('c') == 'C'
+
+    def test_unichar_args(self):
+        """
+            #include <stddef.h>
+            DLLEXPORT wchar_t sum_xy_wc(wchar_t x, wchar_t y)
+            {
+                return x + y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_wc', [types.unichar, types.unichar],
+                                types.unichar)
+        res = sum_xy(unichr(1000), unichr(2000))
+        assert type(res) is unicode
+        assert ord(res) == 3000
+
+    def test_single_float_args(self):
+        """
+            DLLEXPORT float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_float', [types.float, types.float],
+                                types.float)
+        res = sum_xy(12.34, 56.78)
+        assert res == self.f_12_34_plus_56_78
+
+
+    def test_slonglong_args(self):
+        """
+            DLLEXPORT long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_longlong', [types.slonglong, types.slonglong],
+                                types.slonglong)
+        x = maxint32+1
+        y = maxint32+2
+        res = sum_xy(x, y)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            DLLEXPORT unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        from _ffi import CDLL, types
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = CDLL(self.libfoo_name)
+        sum_xy = libfoo.getfunc('sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                                types.ulonglong)
+        x = maxint64+1
+        y = 2
+        res = sum_xy(x, y)
+        expected = maxint64 + 3
+        assert res == expected
+        #
+        res = sum_xy(maxint64*2+3, 0)
+        assert res == 1
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            DLLEXPORT long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        sum_point = libfoo.getfunc('sum_point', [ffi_point], types.slong)
+        #
+        p = POINT()
+        p.x = 30
+        p.y = 12
+        res = sum_point(p)
+        assert res == 42
+        p.free()
+
+    def test_byval_result(self):
+        """
+            DLLEXPORT struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        import _rawffi
+        from _ffi import CDLL, types
+        POINT = _rawffi.Structure([('x', 'l'), ('y', 'l')])
+        ffi_point = POINT.get_ffi_type()
+        libfoo = CDLL(self.libfoo_name)
+        make_point = libfoo.getfunc('make_point', [types.slong, types.slong], ffi_point)
+        #
+        p = make_point(12, 34)
+        assert p.x == 12
+        assert p.y == 34
+        p.free()
+
     def test_TypeError_numargs(self):
         from _ffi import CDLL, types
         libfoo = CDLL(self.libfoo_name)
@@ -142,3 +444,10 @@
     def test_OSError_loading(self):
         from _ffi import CDLL, types
         raises(OSError, "CDLL('I do not exist')")
+
+    def test_AttributeError_missing_function(self):
+        from _ffi import CDLL, types
+        libfoo = CDLL(self.libfoo_name)
+        raises(AttributeError, "libfoo.getfunc('I_do_not_exist', [], types.void)")
+        libnone = CDLL(None)
+        raises(AttributeError, "libnone.getfunc('I_do_not_exist', [], types.void)")
diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py
--- a/pypy/module/_io/interp_bufferedio.py
+++ b/pypy/module/_io/interp_bufferedio.py
@@ -175,7 +175,7 @@
         return space.call_method(self.w_raw, "isatty")
 
     def repr_w(self, space):
-        typename = space.type(self).getname(space, '?')
+        typename = space.type(self).getname(space)
         module = space.str_w(space.type(self).get_module())
         try:
             w_name = space.getattr(self, space.wrap("name"))
diff --git a/pypy/module/_io/interp_io.py b/pypy/module/_io/interp_io.py
--- a/pypy/module/_io/interp_io.py
+++ b/pypy/module/_io/interp_io.py
@@ -119,7 +119,7 @@
     if buffering < 0:
         buffering = DEFAULT_BUFFER_SIZE
 
-        if "st_blksize" in STAT_FIELD_TYPES:
+        if space.config.translation.type_system == 'lltype' and 'st_blksize' in STAT_FIELD_TYPES:
             fileno = space.int_w(space.call_method(w_raw, "fileno"))
             try:
                 st = os.fstat(fileno)
diff --git a/pypy/module/_io/interp_iobase.py b/pypy/module/_io/interp_iobase.py
--- a/pypy/module/_io/interp_iobase.py
+++ b/pypy/module/_io/interp_iobase.py
@@ -155,7 +155,7 @@
                     raise operationerrfmt(
                         space.w_IOError,
                         "peek() should have returned a bytes object, "
-                        "not '%s'", space.type(w_readahead).getname(space, '?'))
+                        "not '%s'", space.type(w_readahead).getname(space))
                 length = space.len_w(w_readahead)
                 if length > 0:
                     n = 0
@@ -181,7 +181,7 @@
                 raise operationerrfmt(
                     space.w_IOError,
                     "peek() should have returned a bytes object, "
-                    "not '%s'", space.type(w_read).getname(space, '?'))
+                    "not '%s'", space.type(w_read).getname(space))
             read = space.str_w(w_read)
             if not read:
                 break
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -129,7 +129,7 @@
         if not space.isinstance_w(w_obj, space.w_unicode):
             raise operationerrfmt(space.w_TypeError,
                                   "string argument expected, got '%s'",
-                                  space.type(w_obj).getname(space, '?'))
+                                  space.type(w_obj).getname(space))
         self._check_closed(space)
 
         orig_size = space.len_w(w_obj)
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -149,7 +149,7 @@
                             factor * float(self.ll_it), w_sublist)
         return space.wrap(w_se)
 
-    @jit.purefunction
+    @jit.elidable
     def _get_or_make_subentry(self, entry, make=True):
         try:
             return self.calls[entry]
@@ -167,7 +167,7 @@
         self.previous = profobj.current_context
         entry.recursionLevel += 1
         if profobj.subcalls and self.previous:
-            caller = jit.hint(self.previous.entry, promote=True)
+            caller = jit.promote(self.previous.entry)
             subentry = caller._get_or_make_subentry(entry)
             subentry.recursionLevel += 1
         self.ll_t0 = profobj.ll_timer()
@@ -179,7 +179,7 @@
             self.previous.ll_subt += tt
         entry._stop(tt, it)
         if profobj.subcalls and self.previous:
-            caller = jit.hint(self.previous.entry, promote=True)
+            caller = jit.promote(self.previous.entry)
             subentry = caller._get_or_make_subentry(entry, False)
             if subentry is not None:
                 subentry._stop(tt, it)
@@ -212,7 +212,7 @@
                 module += '.'
         return '{%s%s}' % (module, w_arg.name)
     else:
-        class_name = space.type(w_arg).getname(space, '?')
+        class_name = space.type(w_arg).getname(space)
         return "{'%s' object}" % (class_name,)
 
 def lsprof_call(space, w_self, frame, event, w_arg):
@@ -282,7 +282,7 @@
         c_setup_profiling()
         space.getexecutioncontext().setllprofile(lsprof_call, space.wrap(self))
 
-    @jit.purefunction
+    @jit.elidable
     def _get_or_make_entry(self, f_code, make=True):
         try:
             return self.data[f_code]
@@ -293,7 +293,7 @@
                 return entry
             return None
 
-    @jit.purefunction
+    @jit.elidable
     def _get_or_make_builtin_entry(self, key, make=True):
         try:
             return self.builtin_data[key]
@@ -306,7 +306,7 @@
 
     def _enter_call(self, f_code):
         # we have a superb gc, no point in freelist :)
-        self = jit.hint(self, promote=True)
+        self = jit.promote(self)
         entry = self._get_or_make_entry(f_code)
         self.current_context = ProfilerContext(self, entry)
 
@@ -314,14 +314,14 @@
         context = self.current_context
         if context is None:
             return
-        self = jit.hint(self, promote=True)
+        self = jit.promote(self)
         entry = self._get_or_make_entry(f_code, False)
         if entry is not None:
             context._stop(self, entry)
         self.current_context = context.previous
 
     def _enter_builtin_call(self, key):
-        self = jit.hint(self, promote=True)
+        self = jit.promote(self)
         entry = self._get_or_make_builtin_entry(key)
         self.current_context = ProfilerContext(self, entry)
 
@@ -329,7 +329,7 @@
         context = self.current_context
         if context is None:
             return
-        self = jit.hint(self, promote=True)
+        self = jit.promote(self)
         entry = self._get_or_make_builtin_entry(key, False)
         if entry is not None:
             context._stop(self, entry)
diff --git a/pypy/module/_lsprof/test/test_cprofile.py b/pypy/module/_lsprof/test/test_cprofile.py
--- a/pypy/module/_lsprof/test/test_cprofile.py
+++ b/pypy/module/_lsprof/test/test_cprofile.py
@@ -181,8 +181,7 @@
 
 
 class AppTestWithDifferentBytecodes(AppTestCProfile):
-    keywords = {'objspace.opcodes.CALL_LIKELY_BUILTIN': True,
-                'objspace.opcodes.CALL_METHOD': True}
+    keywords = {'objspace.opcodes.CALL_METHOD': True}
 
 
 expected_output = {}
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,6 +3,8 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
 
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+
 
 class EncodeDecodeError(Exception):
     def __init__(self, start, end, reason):
@@ -103,8 +105,12 @@
                                           [DECODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
                                          [DECODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_dec_replace_on_error = llexternal('pypy_cjk_dec_replace_on_error',
+                                           [DECODEBUF_P, rffi.CWCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def decode(codec, stringdata):
+def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
@@ -112,10 +118,12 @@
         if not decodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_dec_chunk(decodebuf)
-            if r != 0:
-                multibytecodec_decerror(decodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_dec_chunk(decodebuf)
+                if r == 0:
+                    break
+                multibytecodec_decerror(decodebuf, r, errors,
+                                        errorcb, namecb, stringdata)
             src = pypy_cjk_dec_outbuf(decodebuf)
             length = pypy_cjk_dec_outlen(decodebuf)
             return rffi.wcharpsize2unicode(src, length)
@@ -126,7 +134,8 @@
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
 
-def multibytecodec_decerror(decodebuf, e):
+def multibytecodec_decerror(decodebuf, e, errors,
+                            errorcb, namecb, stringdata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -138,12 +147,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the unicode to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_dec_inbuf_consumed(decodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = u""
+    elif errors == "replace":
+        replace = UNICODE_REPLACEMENT_CHARACTER
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               stringdata, start, end)
+    inbuf = rffi.get_nonmoving_unicodebuffer(replace)
+    try:
+        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmoving_unicodebuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
 
 # ____________________________________________________________
 # Encoding
@@ -165,8 +189,12 @@
                                           [ENCODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_enc_inbuf_consumed = llexternal('pypy_cjk_enc_inbuf_consumed',
                                          [ENCODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_enc_replace_on_error = llexternal('pypy_cjk_enc_replace_on_error',
+                                           [ENCODEBUF_P, rffi.CCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def encode(codec, unicodedata):
+def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
@@ -174,14 +202,18 @@
         if not encodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_enc_chunk(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
-            r = pypy_cjk_enc_reset(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_enc_chunk(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
+            while True:
+                r = pypy_cjk_enc_reset(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
             src = pypy_cjk_enc_outbuf(encodebuf)
             length = pypy_cjk_enc_outlen(encodebuf)
             return rffi.charpsize2str(src, length)
@@ -192,7 +224,8 @@
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
-def multibytecodec_encerror(encodebuf, e):
+def multibytecodec_encerror(encodebuf, e, errors,
+                            codec, errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -204,9 +237,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the string to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_enc_inbuf_consumed(encodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = ""
+    elif errors == "replace":
+        try:
+            replace = encode(codec, u"?")
+        except EncodeDecodeError:
+            replace = "?"
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               unicodedata, start, end)
+    inbuf = rffi.get_nonmovingbuffer(replace)
+    try:
+        r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmovingbuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import OperationError
 from pypy.module._multibytecodec import c_codecs
+from pypy.module._codecs.interp_codecs import CodecState
 
 
 class MultibyteCodec(Wrappable):
@@ -13,13 +14,13 @@
 
     @unwrap_spec(input=str, errors="str_or_None")
     def decode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.decode(self.codec, input)
+            output = c_codecs.decode(self.codec, input, errors,
+                                     state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeDecodeError,
@@ -37,13 +38,13 @@
 
     @unwrap_spec(input=unicode, errors="str_or_None")
     def encode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.encode(self.codec, input)
+            output = c_codecs.encode(self.codec, input, errors,
+                                     state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeEncodeError,
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -36,6 +36,37 @@
         e = raises(UnicodeDecodeError, codec.decode, "~{xyz}").value
         assert e.args == ('hz', '~{xyz}', 2, 4, 'illegal multibyte sequence')
 
+    def test_decode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='ignore')
+        assert r == (u'def\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'ignore')
+        assert r == (u'def\u5fcf', 9)
+
+    def test_decode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+
+    def test_decode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.decode_custom_error_handler",
+                              lambda e: (u'\u1234\u5678', e.end))
+        u = "abc\xDD".decode("hz", "test.decode_custom_error_handler")
+        assert u == u'abc\u1234\u5678'
+
+    def test_decode_custom_error_handler_overflow(self):
+        import codecs
+        import sys
+        codecs.register_error("test.test_decode_custom_error_handler_overflow",
+                              lambda e: (u'', sys.maxint + 1))
+        raises((IndexError, OverflowError), "abc\xDD".decode, "hz",
+               "test.test_decode_custom_error_handler_overflow")
+
     def test_encode_hz(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
@@ -54,3 +85,24 @@
         assert e.start == 3
         assert e.end == 4
         assert e.reason == 'illegal multibyte sequence'
+
+    def test_encode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'ignore')
+        assert r == ('abcdef', 7)
+        assert type(r[0]) is str
+
+    def test_encode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'replace')
+        assert r == ('abc?def', 7)
+        assert type(r[0]) is str
+
+    def test_encode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.multi_bad_handler", lambda e: (repl, 1))
+        repl = u"\u2014"
+        s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
+        assert s == '\xA1\xAA'
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -36,6 +36,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_decode_hz_ignore():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'ignore')
+    assert u == u'def\u5fcf'
+
+def test_decode_hz_replace():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'replace')
+    assert u == u'def\ufffd\u5fcf'
+
 def test_encode_hz():
     c = getcodec("hz")
     s = encode(c, u'foobar')
@@ -51,6 +61,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_encode_hz_ignore():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'ignore')
+    assert s == 'abcdef'
+
+def test_encode_hz_replace():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'replace')
+    assert s == 'abc?def'
+
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
     s = encode(c, u'\u83ca\u5730\u6642\u592b')
diff --git a/pypy/module/_multiprocessing/interp_connection.py b/pypy/module/_multiprocessing/interp_connection.py
--- a/pypy/module/_multiprocessing/interp_connection.py
+++ b/pypy/module/_multiprocessing/interp_connection.py
@@ -360,7 +360,7 @@
         conn_type = ["read-only", "write-only", "read-write"][self.flags]
 
         return space.wrap("<%s %s, handle %zd>" % (
-            conn_type, space.type(self).getname(space, '?'), self.do_fileno()))
+            conn_type, space.type(self).getname(space), self.do_fileno()))
 
     def is_valid(self):
         return self.handle != self.INVALID_HANDLE_VALUE
diff --git a/pypy/module/_multiprocessing/test/test_memory.py b/pypy/module/_multiprocessing/test/test_memory.py
--- a/pypy/module/_multiprocessing/test/test_memory.py
+++ b/pypy/module/_multiprocessing/test/test_memory.py
@@ -3,7 +3,7 @@
 class AppTestMemory:
     def setup_class(cls):
         space = gettestobjspace(
-            usemodules=('_multiprocessing', 'mmap', '_rawffi'))
+            usemodules=('_multiprocessing', 'mmap', '_rawffi', '_ffi'))
         cls.space = space
 
     def test_address_of(self):
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -176,7 +176,7 @@
             except KeyError:
                 raise operationerrfmt(space.w_AttributeError,
                     "No symbol %s found in library %s", name, self.name)
-        
+
         elif (_MS_WINDOWS and
               space.is_true(space.isinstance(w_name, space.w_int))):
             ordinal = space.int_w(w_name)
@@ -250,11 +250,18 @@
     def get_basic_ffi_type(self):
         raise NotImplementedError
 
+    def descr_get_ffi_type(self, space):
+        # XXX: this assumes that you have the _ffi module enabled. In the long
+        # term, probably we will move the code for build structures and arrays
+        # from _rawffi to _ffi
+        from pypy.module._ffi.interp_ffi import W_FFIType
+        return W_FFIType('<unknown>', self.get_basic_ffi_type(), self)
+
     @unwrap_spec(n=int)
     def descr_size_alignment(self, space, n=1):
         return space.newtuple([space.wrap(self.size * n),
                                space.wrap(self.alignment)])
-    
+
 
 class W_DataInstance(Wrappable):
     def __init__(self, space, size, address=r_uint(0)):
@@ -420,7 +427,7 @@
                     if not (argletter in TYPEMAP_PTR_LETTERS and
                             letter in TYPEMAP_PTR_LETTERS):
                         msg = "Argument %d should be typecode %s, got %s"
-                        raise operationerrfmt(space.w_TypeError, msg, 
+                        raise operationerrfmt(space.w_TypeError, msg,
                                               i+1, argletter, letter)
             args_ll.append(arg.ll_buffer)
             # XXX we could avoid the intermediate list args_ll
@@ -473,17 +480,25 @@
 alignment = _create_new_accessor('alignment', 'c_alignment')
 
 @unwrap_spec(address=r_uint, maxlength=int)
-def charp2string(space, address, maxlength=sys.maxint):
+def charp2string(space, address, maxlength=-1):
     if address == 0:
         return space.w_None
-    s = rffi.charp2strn(rffi.cast(rffi.CCHARP, address), maxlength)
+    charp_addr = rffi.cast(rffi.CCHARP, address)
+    if maxlength == -1:
+        s = rffi.charp2str(charp_addr)
+    else:
+        s = rffi.charp2strn(charp_addr, maxlength)
     return space.wrap(s)
 
 @unwrap_spec(address=r_uint, maxlength=int)
-def wcharp2unicode(space, address, maxlength=sys.maxint):
+def wcharp2unicode(space, address, maxlength=-1):
     if address == 0:
         return space.w_None
-    s = rffi.wcharp2unicoden(rffi.cast(rffi.CWCHARP, address), maxlength)
+    wcharp_addr = rffi.cast(rffi.CWCHARP, address)
+    if maxlength == -1:
+        s = rffi.wcharp2unicode(wcharp_addr)
+    else:
+        s = rffi.wcharp2unicoden(wcharp_addr, maxlength)
     return space.wrap(s)
 
 @unwrap_spec(address=r_uint, maxlength=int)
diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py
--- a/pypy/module/_rawffi/structure.py
+++ b/pypy/module/_rawffi/structure.py
@@ -248,7 +248,8 @@
     alignment   = interp_attrproperty('alignment', W_Structure),
     fieldoffset = interp2app(W_Structure.descr_fieldoffset),
     fieldsize   = interp2app(W_Structure.descr_fieldsize),
-    size_alignment = interp2app(W_Structure.descr_size_alignment)
+    size_alignment = interp2app(W_Structure.descr_size_alignment),
+    get_ffi_type   = interp2app(W_Structure.descr_get_ffi_type),
 )
 W_Structure.typedef.acceptable_as_base_class = False
 
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import W_Root, ObjSpace, Wrappable
@@ -899,7 +900,7 @@
 
 def _ssl_thread_id_function():
     from pypy.module.thread import ll_thread
-    return rffi.cast(rffi.INT, ll_thread.get_ident())
+    return rffi.cast(rffi.LONG, ll_thread.get_ident())
 
 def setup_ssl_threads():
     from pypy.module.thread import ll_thread
diff --git a/pypy/module/_stackless/interp_coroutine.py b/pypy/module/_stackless/interp_coroutine.py
--- a/pypy/module/_stackless/interp_coroutine.py
+++ b/pypy/module/_stackless/interp_coroutine.py
@@ -40,7 +40,7 @@
             raise operationerrfmt(
                 space.w_TypeError, 
                 "'%s' object is not callable",
-                space.type(w_obj).getname(space, '?'))
+                space.type(w_obj).getname(space))
         self.w_func = w_obj
         self.args = args
 
diff --git a/pypy/module/_stackless/test/test_greenlet.py b/pypy/module/_stackless/test/test_greenlet.py
--- a/pypy/module/_stackless/test/test_greenlet.py
+++ b/pypy/module/_stackless/test/test_greenlet.py
@@ -72,6 +72,23 @@
         g1 = greenlet(f)
         raises(ValueError, g2.switch)
 
+
+    def test_exc_info_save_restore(self):
+        from _stackless import greenlet
+        import sys
+        def f():
+            try:
+                raise ValueError('fun')
+            except:
+                exc_info = sys.exc_info()
+                greenlet(h).switch()
+                assert exc_info == sys.exc_info()
+
+        def h():
+            assert sys.exc_info() == (None, None, None)
+
+        greenlet(f).switch()
+
     def test_exception(self):
         from _stackless import greenlet
         import sys
diff --git a/pypy/module/_weakref/interp__weakref.py b/pypy/module/_weakref/interp__weakref.py
--- a/pypy/module/_weakref/interp__weakref.py
+++ b/pypy/module/_weakref/interp__weakref.py
@@ -129,7 +129,7 @@
         if w_obj is None:
             state = '; dead'
         else:
-            typename = space.type(w_obj).getname(space, '?')
+            typename = space.type(w_obj).getname(space)
             objname = w_obj.getname(space, '')
             if objname:
                 state = "; to '%s' (%s)" % (typename, objname)
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1,18 +1,21 @@
 from __future__ import with_statement
 
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.buffer import RWBuffer
 from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty, make_weakref_descr
-from pypy.rpython.lltypesystem import lltype, rffi
-from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.rarithmetic import ovfcheck
-from pypy.interpreter.baseobjspace import Wrappable
+from pypy.module._file.interp_file import W_File
+from pypy.objspace.std.model import W_Object
+from pypy.objspace.std.multimethod import FailedToImplement
 from pypy.objspace.std.stdtypedef import SMM, StdTypeDef
 from pypy.objspace.std.register_all import register_all
-from pypy.objspace.std.model import W_Object
-from pypy.module._file.interp_file import W_File
-from pypy.interpreter.buffer import RWBuffer
-from pypy.objspace.std.multimethod import FailedToImplement
+from pypy.rlib.rarithmetic import ovfcheck
+from pypy.rlib.unroll import unrolling_iterable
+from pypy.rpython.lltypesystem import lltype, rffi
+
+
+memcpy = rffi.llexternal("memcpy", [rffi.VOIDP, rffi.VOIDP, rffi.SIZE_T], lltype.Void)
 
 @unwrap_spec(typecode=str)
 def w_array(space, w_cls, typecode, __args__):
@@ -37,7 +40,7 @@
             if len(__args__.arguments_w) > 0:
                 w_initializer = __args__.arguments_w[0]
                 if space.type(w_initializer) is space.w_str:
-                    a.fromstring(w_initializer)
+                    a.fromstring(space.str_w(w_initializer))
                 elif space.type(w_initializer) is space.w_unicode:
                     a.fromsequence(w_initializer)
                 elif space.type(w_initializer) is space.w_list:
@@ -73,6 +76,7 @@
 
 array_buffer_info = SMM('buffer_info', 1)
 array_reduce = SMM('__reduce__', 1)
+array_copy = SMM('__copy__', 1)
 array_byteswap = SMM('byteswap', 1)
 
 
@@ -96,7 +100,7 @@
     itemsize = GetSetProperty(descr_itemsize),
     typecode = GetSetProperty(descr_typecode),
     __weakref__ = make_weakref_descr(W_ArrayBase),
-    )
+)
 W_ArrayBase.typedef.registermethods(globals())
 
 
@@ -159,8 +163,6 @@
         self.data[index] = char
 
 
-
-
 def make_array(mytype):
     class W_Array(W_ArrayBase):
         itemsize = mytype.bytes
@@ -268,12 +270,10 @@
                 raise
             self.setlen(oldlen + i)
 
-        def fromstring(self, w_s):
-            space = self.space
-            s = space.str_w(w_s)
+        def fromstring(self, s):
             if len(s) % self.itemsize != 0:
                 msg = 'string length not a multiple of item size'
-                raise OperationError(space.w_ValueError, space.wrap(msg))
+                raise OperationError(self.space.w_ValueError, self.space.wrap(msg))
             oldlen = self.len
             new = len(s) / mytype.bytes
             self.setlen(oldlen + new)
@@ -311,6 +311,14 @@
         def charbuf(self):
             return  rffi.cast(rffi.CCHARP, self.buffer)
 
+        def w_getitem(self, space, idx):
+            item = self.buffer[idx]
+            if mytype.typecode in 'bBhHil':
+                item = rffi.cast(lltype.Signed, item)
+            elif mytype.typecode == 'f':
+                item = float(item)
+            return space.wrap(item)
+
     # Basic get/set/append/extend methods
 
     def len__Array(space, self):
@@ -319,12 +327,7 @@
     def getitem__Array_ANY(space, self, w_idx):
         idx, stop, step = space.decode_index(w_idx, self.len)
         assert step == 0
-        item = self.buffer[idx]
-        if mytype.typecode in 'bBhHil':
-            item = rffi.cast(lltype.Signed, item)
-        elif mytype.typecode == 'f':
-            item = float(item)
-        return self.space.wrap(item)
+        return self.w_getitem(space, idx)
 
     def getitem__Array_Slice(space, self, w_slice):
         start, stop, step, size = space.decode_index4(w_slice, self.len)
@@ -387,7 +390,7 @@
     def array_count__Array_ANY(space, self, w_val):
         cnt = 0
         for i in range(self.len):
-            w_item = getitem__Array_ANY(space, self, space.wrap(i))
+            w_item = self.w_getitem(space, i)
             if space.is_true(space.eq(w_item, w_val)):
                 cnt += 1
         return space.wrap(cnt)
@@ -395,7 +398,7 @@
     def array_index__Array_ANY(space, self, w_val):
         cnt = 0
         for i in range(self.len):
-            w_item = getitem__Array_ANY(space, self, space.wrap(i))
+            w_item = self.w_getitem(space, i)
             if space.is_true(space.eq(w_item, w_val)):
                 return space.wrap(i)
         msg = 'array.index(x): x not in list'
@@ -413,7 +416,7 @@
         if i < 0 or i >= self.len:
             msg = 'pop index out of range'
             raise OperationError(space.w_IndexError, space.wrap(msg))
-        w_val = getitem__Array_ANY(space, self, space.wrap(i))
+        w_val = self.w_getitem(space, i)
         while i < self.len - 1:
             self.buffer[i] = self.buffer[i + 1]
             i += 1
@@ -515,26 +518,18 @@
     def array_tolist__Array(space, self):
         w_l = space.newlist([])
         for i in range(self.len):
-            w_l.append(getitem__Array_ANY(space, self, space.wrap(i)))
+            w_l.append(self.w_getitem(space, i))
         return w_l
 
     def array_fromlist__Array_List(space, self, w_lst):
         self.fromlist(w_lst)
 
     def array_fromstring__Array_ANY(space, self, w_s):
-        self.fromstring(w_s)
+        self.fromstring(space.str_w(w_s))
 
     def array_tostring__Array(space, self):
         cbuf = self.charbuf()
-        s = ''.join([cbuf[i] for i in xrange(self.len * mytype.bytes)])
-        return self.space.wrap(s)
-##
-##         s = ''
-##         i = 0
-##         while i < self.len * mytype.bytes:
-##             s += cbuf[i]
-##             i += 1
-##         return self.space.wrap(s)
+        return self.space.wrap(rffi.charpsize2str(cbuf, self.len * mytype.bytes))
 
     def array_fromfile__Array_ANY_ANY(space, self, w_f, w_n):
         if not isinstance(w_f, W_File):
@@ -577,10 +572,7 @@
             self.fromsequence(w_ustr)
 
         def array_tounicode__Array(space, self):
-            u = u""
-            for i in range(self.len):
-                u += self.buffer[i]
-            return space.wrap(u)
+            return space.wrap(rffi.wcharpsize2unicode(self.buffer, self.len))
     else:
 
         def array_fromunicode__Array_Unicode(space, self, w_ustr):
@@ -623,6 +615,16 @@
             dct = space.w_None
         return space.newtuple([space.type(self), space.newtuple(args), dct])
 
+    def array_copy__Array(space, self):
+        w_a = mytype.w_class(self.space)
+        w_a.setlen(self.len)
+        memcpy(
+            rffi.cast(rffi.VOIDP, w_a.buffer),
+            rffi.cast(rffi.VOIDP, self.buffer),
+            self.len * mytype.bytes
+        )
+        return w_a
+
     def array_byteswap__Array(space, self):
         if mytype.bytes not in [1, 2, 4, 8]:
             msg = "byteswap not supported for this array"
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -39,6 +39,7 @@
 import pypy.module.cpyext.object
 import pypy.module.cpyext.stringobject
 import pypy.module.cpyext.tupleobject
+import pypy.module.cpyext.setobject
 import pypy.module.cpyext.dictobject
 import pypy.module.cpyext.intobject
 import pypy.module.cpyext.longobject
@@ -64,6 +65,7 @@
 import pypy.module.cpyext.memoryobject
 import pypy.module.cpyext.codecs
 import pypy.module.cpyext.pyfile
+import pypy.module.cpyext.pystrtod
 
 # now that all rffi_platform.Struct types are registered, configure them
 api.configure_types()
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -348,6 +348,7 @@
     '_Py_TrueStruct#': ('PyObject*', 'space.w_True'),
     '_Py_ZeroStruct#': ('PyObject*', 'space.w_False'),
     '_Py_NotImplementedStruct#': ('PyObject*', 'space.w_NotImplemented'),
+    '_Py_EllipsisObject#': ('PyObject*', 'space.w_Ellipsis'),
     'PyDateTimeAPI': ('PyDateTime_CAPI*', 'None'),
     }
 FORWARD_DECLS = []
@@ -561,7 +562,8 @@
             elif callable.api_func.restype is not lltype.Void:
                 retval = rffi.cast(callable.api_func.restype, result)
         except Exception, e:
-            print 'Fatal error in cpyext, calling', callable.__name__
+            print 'Fatal error in cpyext, CPython compatibility layer, calling', callable.__name__
+            print 'Either report a bug or consider not using this particular extension'
             if not we_are_translated():
                 import traceback
                 traceback.print_exc()
diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -122,7 +122,7 @@
         return self.space.unwrap(self.descr_method_repr())
 
     def descr_method_repr(self):
-        return self.getrepr(self.space, "built-in method '%s' of '%s' object" % (self.name, self.w_objclass.getname(self.space, '?')))
+        return self.getrepr(self.space, "built-in method '%s' of '%s' object" % (self.name, self.w_objclass.getname(self.space)))
 
 PyCFunction_Check, PyCFunction_CheckExact = build_type_checkers("CFunction", W_PyCFunctionObject)
 
@@ -151,7 +151,7 @@
 
     def descr_method_repr(self):
         return self.space.wrap("<slot wrapper '%s' of '%s' objects>" % (self.method_name,
-            self.w_objclass.getname(self.space, '?')))
+            self.w_objclass.getname(self.space)))
 
 def cwrapper_descr_call(space, w_self, __args__):
     self = space.interp_w(W_PyCWrapperObject, w_self)
diff --git a/pypy/module/cpyext/pystrtod.py b/pypy/module/cpyext/pystrtod.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/pystrtod.py
@@ -0,0 +1,68 @@
+import errno
+from pypy.interpreter.error import OperationError
+from pypy.module.cpyext.api import cpython_api
+from pypy.module.cpyext.pyobject import PyObject
+from pypy.rlib import rdtoa
+from pypy.rlib import rfloat
+from pypy.rlib import rposix
+from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import rffi
+
+
+ at cpython_api([rffi.CCHARP, rffi.CCHARPP, PyObject], rffi.DOUBLE, error=-1.0)
+def PyOS_string_to_double(space, s, endptr, w_overflow_exception):
+    """Convert a string s to a double, raising a Python
+    exception on failure.  The set of accepted strings corresponds to
+    the set of strings accepted by Python's float() constructor,
+    except that s must not have leading or trailing whitespace.
+    The conversion is independent of the current locale.
+
+    If endptr is NULL, convert the whole string.  Raise
+    ValueError and return -1.0 if the string is not a valid
+    representation of a floating-point number.
+
+    If endptr is not NULL, convert as much of the string as
+    possible and set *endptr to point to the first unconverted
+    character.  If no initial segment of the string is the valid
+    representation of a floating-point number, set *endptr to point
+    to the beginning of the string, raise ValueError, and return
+    -1.0.
+
+    If s represents a value that is too large to store in a float
+    (for example, "1e500" is such a string on many platforms) then
+    if overflow_exception is NULL return Py_HUGE_VAL (with
+    an appropriate sign) and don't set any exception.  Otherwise,
+    overflow_exception must point to a Python exception object;
+    raise that exception and return -1.0.  In both cases, set
+    *endptr to point to the first character after the converted value.
+
+    If any other error occurs during the conversion (for example an
+    out-of-memory error), set the appropriate Python exception and
+    return -1.0.
+    """
+    user_endptr = True
+    try:
+        if not endptr:
+            endptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+            user_endptr = False
+        result = rdtoa.dg_strtod(s, endptr)
+        endpos = (rffi.cast(rffi.LONG, endptr[0]) -
+                  rffi.cast(rffi.LONG, s))
+        if endpos == 0 or (not user_endptr and not endptr[0][0] == '\0'):
+            raise OperationError(
+                space.w_ValueError,
+                space.wrap('invalid input at position %s' % endpos))
+        if rposix.get_errno() == errno.ERANGE:
+            rposix.set_errno(0)
+            if w_overflow_exception is None:
+                if result > 0:
+                    return rfloat.INFINITY
+                else:
+                    return -rfloat.INFINITY
+            else:
+                raise OperationError(w_overflow_exception,
+                                     space.wrap('value too large'))
+        return result
+    finally:
+        if not user_endptr:
+            lltype.free(endptr, flavor='raw')
diff --git a/pypy/module/cpyext/setobject.py b/pypy/module/cpyext/setobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/setobject.py
@@ -0,0 +1,46 @@
+from pypy.interpreter.error import OperationError
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.module.cpyext.api import (cpython_api, Py_ssize_t, CANNOT_FAIL,
+                                    build_type_checkers)
+from pypy.module.cpyext.pyobject import (PyObject, PyObjectP, Py_DecRef,
+    borrow_from, make_ref, from_ref)
+from pypy.module.cpyext.pyerrors import PyErr_BadInternalCall
+from pypy.objspace.std.setobject import W_SetObject, newset
+from pypy.objspace.std.smalltupleobject import W_SmallTupleObject
+
+
+PySet_Check, PySet_CheckExact = build_type_checkers("Set")
+
+
+ at cpython_api([PyObject], PyObject)
+def PySet_New(space, w_iterable):
+    if w_iterable is None:
+        return space.call_function(space.w_set)
+    else:
+        return space.call_function(space.w_set, w_iterable)
+
+ at cpython_api([PyObject, PyObject], rffi.INT_real, error=-1)
+def PySet_Add(space, w_s, w_obj):
+    if not PySet_Check(space, w_s):
+        PyErr_BadInternalCall(space)
+    space.call_method(w_s, 'add', w_obj)
+    return 0
+
+ at cpython_api([PyObject, PyObject], rffi.INT_real, error=-1)
+def PySet_Discard(space, w_s, w_obj):
+    if not PySet_Check(space, w_s):
+        PyErr_BadInternalCall(space)
+    space.call_method(w_s, 'discard', w_obj)
+    return 0
+
+
+ at cpython_api([PyObject], Py_ssize_t, error=CANNOT_FAIL)
+def PySet_GET_SIZE(space, w_s):
+    return space.int_w(space.len(w_s))
+
+ at cpython_api([PyObject], Py_ssize_t, error=-1)
+def PySet_Size(space, ref):
+    if not PySet_Check(space, ref):
+        raise OperationError(space.w_TypeError,
+                             space.wrap("expected set object"))
+    return PySet_GET_SIZE(space, ref)
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -480,39 +480,6 @@
     """Create a new Python complex number object from a C Py_complex value."""
     raise NotImplementedError
 
- at cpython_api([rffi.CCHARP, rffi.CCHARPP, PyObject], rffi.DOUBLE, error=-1.0)
-def PyOS_string_to_double(space, s, endptr, overflow_exception):
-    """Convert a string s to a double, raising a Python
-    exception on failure.  The set of accepted strings corresponds to
-    the set of strings accepted by Python's float() constructor,
-    except that s must not have leading or trailing whitespace.
-    The conversion is independent of the current locale.
-
-    If endptr is NULL, convert the whole string.  Raise
-    ValueError and return -1.0 if the string is not a valid
-    representation of a floating-point number.
-
-    If endptr is not NULL, convert as much of the string as
-    possible and set *endptr to point to the first unconverted
-    character.  If no initial segment of the string is the valid
-    representation of a floating-point number, set *endptr to point
-    to the beginning of the string, raise ValueError, and return
-    -1.0.
-
-    If s represents a value that is too large to store in a float
-    (for example, "1e500" is such a string on many platforms) then
-    if overflow_exception is NULL return Py_HUGE_VAL (with
-    an appropriate sign) and don't set any exception.  Otherwise,
-    overflow_exception must point to a Python exception object;
-    raise that exception and return -1.0.  In both cases, set
-    *endptr to point to the first character after the converted value.
-
-    If any other error occurs during the conversion (for example an
-    out-of-memory error), set the appropriate Python exception and
-    return -1.0.
-    """
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, rffi.CCHARPP], rffi.DOUBLE, error=CANNOT_FAIL)
 def PyOS_ascii_strtod(space, nptr, endptr):
     """Convert a string to a double. This function behaves like the Standard C
diff --git a/pypy/module/cpyext/test/test_pystrtod.py b/pypy/module/cpyext/test/test_pystrtod.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/test_pystrtod.py
@@ -0,0 +1,93 @@
+import math
+
+from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.rpython.lltypesystem import rffi
+from pypy.rpython.lltypesystem import lltype
+
+
+class TestPyOS_string_to_double(BaseApiTest):
+
+    def test_simple_float(self, api):
+        s = rffi.str2charp('0.4')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert r == 0.4
+        rffi.free_charp(s)
+
+    def test_empty_string(self, api):
+        s = rffi.str2charp('')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert r == -1.0
+        raises(ValueError)
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+
+    def test_bad_string(self, api):
+        s = rffi.str2charp(' 0.4')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert r == -1.0
+        raises(ValueError)
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+
+    def test_overflow_pos(self, api):
+        s = rffi.str2charp('1e500')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert math.isinf(r)
+        assert r > 0
+        rffi.free_charp(s)
+
+    def test_overflow_neg(self, api):
+        s = rffi.str2charp('-1e500')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert math.isinf(r)
+        assert r < 0
+        rffi.free_charp(s)
+
+    def test_overflow_exc(self, space, api):
+        s = rffi.str2charp('1e500')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, space.w_ValueError)
+        assert r == -1.0
+        raises(ValueError)
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+
+    def test_endptr_number(self, api):
+        s = rffi.str2charp('0.4')
+        endp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        r = api.PyOS_string_to_double(s, endp, None)
+        assert r == 0.4
+        endp_addr = rffi.cast(rffi.LONG, endp[0])
+        s_addr = rffi.cast(rffi.LONG, s)
+        assert endp_addr == s_addr + 3
+        rffi.free_charp(s)
+        lltype.free(endp, flavor='raw')
+
+    def test_endptr_tail(self, api):
+        s = rffi.str2charp('0.4 foo')
+        endp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        r = api.PyOS_string_to_double(s, endp, None)
+        assert r == 0.4
+        endp_addr = rffi.cast(rffi.LONG, endp[0])
+        s_addr = rffi.cast(rffi.LONG, s)
+        assert endp_addr == s_addr + 3
+        rffi.free_charp(s)
+        lltype.free(endp, flavor='raw')
+
+    def test_endptr_no_conversion(self, api):
+        s = rffi.str2charp('foo')
+        endp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        r = api.PyOS_string_to_double(s, endp, None)
+        assert r == -1.0
+        raises(ValueError)
+        endp_addr = rffi.cast(rffi.LONG, endp[0])
+        s_addr = rffi.cast(rffi.LONG, s)
+        assert endp_addr == s_addr
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+        lltype.free(endp, flavor='raw')
diff --git a/pypy/module/cpyext/test/test_setobject.py b/pypy/module/cpyext/test/test_setobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/test_setobject.py
@@ -0,0 +1,29 @@
+import py
+
+from pypy.module.cpyext.pyobject import PyObject, PyObjectP, make_ref, from_ref
+from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.conftest import gettestobjspace
+
+
+class TestTupleObject(BaseApiTest):
+    def test_setobj(self, space, api):
+        assert not api.PySet_Check(space.w_None)
+        assert api.PySet_Add(space.w_None, space.w_None) == -1
+        api.PyErr_Clear()
+        w_set = space.call_function(space.w_set)
+        space.call_method(w_set, 'update', space.wrap([1,2,3,4]))
+        assert api.PySet_Size(w_set) == 4
+        assert api.PySet_GET_SIZE(w_set) == 4
+        raises(TypeError, api.PySet_Size(space.newlist([])))
+        api.PyErr_Clear()
+
+    def test_set_add_discard(self, space, api):
+        w_set = api.PySet_New(None)
+        assert api.PySet_Size(w_set) == 0
+        w_set = api.PySet_New(space.wrap([1,2,3,4]))
+        assert api.PySet_Size(w_set) == 4
+        api.PySet_Add(w_set, space.wrap(6))
+        assert api.PySet_Size(w_set) == 5
+        api.PySet_Discard(w_set, space.wrap(6))
+        assert api.PySet_Size(w_set) == 4
diff --git a/pypy/module/cpyext/test/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py
--- a/pypy/module/cpyext/test/test_sliceobject.py
+++ b/pypy/module/cpyext/test/test_sliceobject.py
@@ -67,3 +67,14 @@
              """),
             ])
         assert module.nullslice() == slice(None, None, None)
+
+    def test_ellipsis(self):
+        module = self.import_extension('foo', [
+            ("get_ellipsis", "METH_NOARGS",
+             """
+                 PyObject *ret = Py_Ellipsis;
+                 Py_INCREF(ret);
+                 return ret;
+             """),
+            ])
+        assert module.get_ellipsis() is Ellipsis
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -367,3 +367,14 @@
                     data, len(u), lltype.nullptr(rffi.CCHARP.TO))
         rffi.free_wcharp(data)
 
+    def test_format(self, space, api):
+        w_format = space.wrap(u'hi %s')
+        w_args = space.wrap((u'test',))
+        w_formated = api.PyUnicode_Format(w_format, w_args)
+        assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args))
+
+    def test_join(self, space, api):
+        w_sep = space.wrap(u'<sep>')
+        w_seq = space.wrap([u'a', u'b'])
+        w_joined = api.PyUnicode_Join(w_sep, w_seq)
+        assert space.unwrap(w_joined) == u'a<sep>b'
diff --git a/pypy/module/cpyext/test/test_weakref.py b/pypy/module/cpyext/test/test_weakref.py
--- a/pypy/module/cpyext/test/test_weakref.py
+++ b/pypy/module/cpyext/test/test_weakref.py
@@ -7,6 +7,7 @@
         w_ref = api.PyWeakref_NewRef(w_obj, space.w_None)
         assert w_ref is not None
         assert space.is_w(api.PyWeakref_GetObject(w_ref), w_obj)
+        assert space.is_w(api.PyWeakref_GET_OBJECT(w_ref), w_obj)
         assert space.is_w(api.PyWeakref_LockObject(w_ref), w_obj)
 
         w_obj = space.newtuple([])
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -450,7 +450,7 @@
             PyObject_Del.api_func.get_wrapper(space))
     pto.c_tp_alloc = llhelper(PyType_GenericAlloc.api_func.functype,
             PyType_GenericAlloc.api_func.get_wrapper(space))
-    pto.c_tp_name = rffi.str2charp(w_type.getname(space, "?"))
+    pto.c_tp_name = rffi.str2charp(w_type.getname(space))
     pto.c_tp_basicsize = -1 # hopefully this makes malloc bail out
     pto.c_tp_itemsize = 0
     # uninitialized fields:
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -523,3 +523,11 @@
     copies sizeof(Py_UNICODE) * length bytes from source to target"""
     for i in range(0, length):
         target[i] = source[i]
+
+ at cpython_api([PyObject, PyObject], PyObject)
+def PyUnicode_Format(space, w_format, w_args):
+    return space.mod(w_format, w_args)
+
+ at cpython_api([PyObject, PyObject], PyObject)
+def PyUnicode_Join(space, w_sep, w_seq):
+    return space.call_method(w_sep, 'join', w_seq)
diff --git a/pypy/module/cpyext/weakrefobject.py b/pypy/module/cpyext/weakrefobject.py
--- a/pypy/module/cpyext/weakrefobject.py
+++ b/pypy/module/cpyext/weakrefobject.py
@@ -21,6 +21,10 @@
     """Return the referenced object from a weak reference.  If the referent is
     no longer live, returns None. This function returns a borrowed reference.
     """
+    return PyWeakref_GET_OBJECT(space, w_ref)
+
+ at cpython_api([PyObject], PyObject)
+def PyWeakref_GET_OBJECT(space, w_ref):
     return borrow_from(w_ref, space.call_function(w_ref))
 
 @cpython_api([PyObject], PyObject)
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -136,7 +136,7 @@
             args_repr = space.str_w(space.repr(space.newtuple(self.args_w)))
         else:
             args_repr = "()"
-        clsname = self.getclass(space).getname(space, '?')
+        clsname = self.getclass(space).getname(space)
         return space.wrap(clsname + args_repr)
 
     def descr_getargs(self, space):
@@ -546,7 +546,7 @@
             w_tuple = space.newtuple(values_w + [self.w_lastlineno])
             args_w = [self.args_w[0], w_tuple]
             args_repr = space.str_w(space.repr(space.newtuple(args_w)))
-            clsname = self.getclass(space).getname(space, '?')
+            clsname = self.getclass(space).getname(space)
             return space.wrap(clsname + args_repr)
         else:
             return W_StandardError.descr_repr(self, space)
diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -120,7 +120,7 @@
 def check_sys_modules_w(space, modulename):
     return space.finditem_str(space.sys.get('modules'), modulename)
 
- at jit.purefunction
+ at jit.elidable
 def _get_dot_position(str, n):
     # return the index in str of the '.' such that there are n '.'-separated
     # strings after it
@@ -133,8 +133,8 @@
 def _get_relative_name(space, modulename, level, w_globals):
     w = space.wrap
     ctxt_w_package = space.finditem_str(w_globals, '__package__')
-    ctxt_w_package = jit.hint(ctxt_w_package, promote=True)
-    level = jit.hint(level, promote=True)
+    ctxt_w_package = jit.promote(ctxt_w_package)
+    level = jit.promote(level)
 
     ctxt_package = None
     if ctxt_w_package is not None and ctxt_w_package is not space.w_None:
@@ -184,7 +184,7 @@
         ctxt_w_name = space.finditem_str(w_globals, '__name__')
         ctxt_w_path = space.finditem_str(w_globals, '__path__')
 
-        ctxt_w_name = jit.hint(ctxt_w_name, promote=True)
+        ctxt_w_name = jit.promote(ctxt_w_name)
         ctxt_name = None
         if ctxt_w_name is not None:
             try:
@@ -622,7 +622,13 @@
         try:
             if find_info:
                 w_mod = load_module(space, w_modulename, find_info)
-                w_mod = space.getitem(space.sys.get("modules"), w_modulename)
+                try:
+                    w_mod = space.getitem(space.sys.get("modules"),
+                                          w_modulename)
+                except OperationError, oe:
+                    if not oe.match(space, space.w_KeyError):
+                        raise
+                    raise OperationError(space.w_ImportError, w_modulename)
                 if w_parent is not None:
                     space.setattr(w_parent, space.wrap(partname), w_mod)
                 return w_mod
@@ -793,14 +799,13 @@
 
 """
 
-# XXX picking a magic number is a mess.  So far it works because we
-# have only two extra opcodes, which bump the magic number by +1 and
-# +2 respectively, and CPython leaves a gap of 10 when it increases
+# picking a magic number is a mess.  So far it works because we
+# have only one extra opcode, which bumps the magic number by +2, and CPython
+# leaves a gap of 10 when it increases
 # its own magic number.  To avoid assigning exactly the same numbers
 # as CPython we always add a +2.  We'll have to think again when we
-# get at the fourth new opcode :-(
+# get three more new opcodes
 #
-#  * CALL_LIKELY_BUILTIN    +1
 #  * CALL_METHOD            +2
 #
 # In other words:
@@ -823,8 +828,6 @@
             return struct.unpack('<i', magic)[0]
 
     result = default_magic
-    if space.config.objspace.opcodes.CALL_LIKELY_BUILTIN:
-        result += 1
     if space.config.objspace.opcodes.CALL_METHOD:
         result += 2
     return result
diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -37,6 +37,7 @@
                     ambig = "imamodule = 1",
                     test_reload = "def test():\n    raise ValueError\n",
                     infinite_reload = "import infinite_reload; reload(infinite_reload)",
+                    del_sys_module = "import sys\ndel sys.modules['del_sys_module']\n",
                     )
     root.ensure("notapackage", dir=1)    # empty, no __init__.py
     setuppkg("pkg",
@@ -562,6 +563,14 @@
         except ImportError:
             pass
 
+    def test_del_from_sys_modules(self):
+        try:
+            import del_sys_module
+        except ImportError:
+            pass    # ok
+        else:
+            assert False, 'should not work'
+
 class TestAbi:
     def test_abi_tag(self):
         space1 = gettestobjspace(soabi='TEST')
diff --git a/pypy/module/math/__init__.py b/pypy/module/math/__init__.py
--- a/pypy/module/math/__init__.py
+++ b/pypy/module/math/__init__.py
@@ -4,6 +4,7 @@
 
 class Module(MixedModule):
     appleveldefs = {
+       'factorial' : 'app_math.factorial'
     }
 
     interpleveldefs = {
@@ -40,7 +41,6 @@
        'isnan'          : 'interp_math.isnan',
        'trunc'          : 'interp_math.trunc',
        'fsum'           : 'interp_math.fsum',
-       'factorial'      : 'interp_math.factorial',
        'asinh'          : 'interp_math.asinh',
        'acosh'          : 'interp_math.acosh',
        'atanh'          : 'interp_math.atanh',
diff --git a/pypy/module/math/app_math.py b/pypy/module/math/app_math.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/math/app_math.py
@@ -0,0 +1,13 @@
+def factorial(x):
+    """Find x!."""
+    if isinstance(x, float):
+        fl = int(x)
+        if fl != x:
+            raise ValueError("float arguments must be integral")
+        x = fl
+    if x < 0:
+        raise ValueError("x must be >= 0")
+    res = 1
+    for i in range(1, x + 1):
+        res *= i
+    return res
diff --git a/pypy/module/math/interp_math.py b/pypy/module/math/interp_math.py
--- a/pypy/module/math/interp_math.py
+++ b/pypy/module/math/interp_math.py
@@ -373,22 +373,6 @@
                 hi = v
     return space.wrap(hi)
 
-def factorial(space, w_x):
-    """Find x!."""
-    if space.isinstance_w(w_x, space.w_float):
-        fl = space.float_w(w_x)
-        if math.floor(fl) != fl:
-            raise OperationError(space.w_ValueError,
-                                 space.wrap("float arguments must be integral"))
-        w_x = space.long(w_x)
-    x = space.int_w(w_x)
-    if x < 0:
-        raise OperationError(space.w_ValueError, space.wrap("x must be >= 0"))
-    w_res = space.wrap(1)
-    for i in range(1, x + 1):
-        w_res = space.mul(w_res, space.wrap(i))
-    return w_res
-
 def log1p(space, w_x):
     """Find log(x + 1)."""
     return math1(space, rfloat.log1p, w_x)
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -8,11 +8,16 @@
     interpleveldefs = {
         'array': 'interp_numarray.SingleDimArray',
         'zeros': 'interp_numarray.zeros',
+        'empty': 'interp_numarray.zeros',
+        'ones': 'interp_numarray.ones',
+        'fromstring': 'interp_support.fromstring',
 
         # ufuncs
+        'abs': 'interp_ufuncs.absolute',
         'absolute': 'interp_ufuncs.absolute',
         'copysign': 'interp_ufuncs.copysign',
         'exp': 'interp_ufuncs.exp',
+        'floor': 'interp_ufuncs.floor',
         'maximum': 'interp_ufuncs.maximum',
         'minimum': 'interp_ufuncs.minimum',
         'negative': 'interp_ufuncs.negative',
@@ -20,4 +25,7 @@
         'sign': 'interp_ufuncs.sign',
     }
 
-    appleveldefs = {}
+    appleveldefs = {
+        'average': 'app_numpy.average',
+        'mean': 'app_numpy.mean',
+    }
diff --git a/pypy/module/micronumpy/app_numpy.py b/pypy/module/micronumpy/app_numpy.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/app_numpy.py
@@ -0,0 +1,11 @@
+import numpy
+
+def average(a):
+    # This implements a weighted average, for now we don't implement the
+    # weighting, just the average part!
+    return mean(a)
+
+def mean(a):
+    if not hasattr(a, "mean"):
+        a = numpy.array(a)
+    return a.mean()
\ No newline at end of file
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/compile.py
@@ -0,0 +1,54 @@
+
+""" This is a set of tools for standalone compiling of numpy expressions.
+It should not be imported by the module itself
+"""
+
+from pypy.module.micronumpy.interp_numarray import FloatWrapper, SingleDimArray
+
+class BogusBytecode(Exception):
+    pass
+
+def create_array(size):
+    a = SingleDimArray(size)
+    for i in range(size):
+        a.storage[i] = float(i % 10)
+    return a
+
+class TrivialSpace(object):
+    def wrap(self, x):
+        return x
+
+def numpy_compile(bytecode, array_size):
+    space = TrivialSpace()
+    stack = []
+    i = 0
+    for b in bytecode:
+        if b == 'a':
+            stack.append(create_array(array_size))
+            i += 1
+        elif b == 'f':
+            stack.append(FloatWrapper(1.2))
+        elif b == '+':
+            right = stack.pop()
+            stack.append(stack.pop().descr_add(space, right))
+        elif b == '-':
+            right = stack.pop()
+            stack.append(stack.pop().descr_sub(space, right))
+        elif b == '*':
+            right = stack.pop()
+            stack.append(stack.pop().descr_mul(space, right))
+        elif b == '/':
+            right = stack.pop()
+            stack.append(stack.pop().descr_div(space, right))
+        elif b == '%':
+            right = stack.pop()
+            stack.append(stack.pop().descr_mod(space, right))
+        elif b == '|':
+            stack.append(stack.pop().descr_abs(space))
+        else:
+            print "Unknown opcode: %s" % b
+            raise BogusBytecode()
+    if len(stack) != 1:
+        print "Bogus bytecode, uneven stack length"
+        raise BogusBytecode()
+    return stack[0]
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -1,10 +1,11 @@
 from pypy.interpreter.baseobjspace import ObjSpace, W_Root, Wrappable
-from pypy.interpreter.error import operationerrfmt
+from pypy.interpreter.error import operationerrfmt, OperationError
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.rlib import jit
 from pypy.rpython.lltypesystem import lltype
 from pypy.tool.sourcetools import func_with_new_name
+import math
 
 
 def dummy1(v):
@@ -19,6 +20,8 @@
 
 numpy_driver = jit.JitDriver(greens = ['signature'],
                              reds = ['result_size', 'i', 'self', 'result'])
+all_driver = jit.JitDriver(greens=['signature'], reds=['i', 'size', 'self'])
+any_driver = jit.JitDriver(greens=['signature'], reds=['i', 'size', 'self'])
 
 class Signature(object):
     def __init__(self):
@@ -30,6 +33,12 @@
         self.transitions[target] = new = Signature()
         return new
 
+def pos(v):
+    return v
+def neg(v):
+    return -v
+def absolute(v):
+    return abs(v)
 def add(v1, v2):
     return v1 + v2
 def sub(v1, v2):
@@ -38,15 +47,43 @@
     return v1 * v2
 def div(v1, v2):
     return v1 / v2
+def power(v1, v2):
+    return math.pow(v1, v2)
+def mod(v1, v2):
+    return math.fmod(v1, v2)
+def maximum(v1, v2):
+    return max(v1, v2)
+def minimum(v1, v2):
+    return min(v1, v2)
 
 class BaseArray(Wrappable):
     def __init__(self):
         self.invalidates = []
 
     def invalidated(self):
+        if self.invalidates:
+            self._invalidated()
+
+    def _invalidated(self):
         for arr in self.invalidates:
             arr.force_if_needed()
-        self.invalidates = []
+        del self.invalidates[:]
+
+    def _unop_impl(function):
+        signature = Signature()
+        def impl(self, space):
+            new_sig = self.signature.transition(signature)
+            res = Call1(
+                function,
+                self,
+                new_sig)
+            self.invalidates.append(res)
+            return space.wrap(res)
+        return func_with_new_name(impl, "uniop_%s_impl" % function.__name__)
+
+    descr_pos = _unop_impl(pos)
+    descr_neg = _unop_impl(neg)
+    descr_abs = _unop_impl(absolute)
 
     def _binop_impl(function):
         signature = Signature()
@@ -76,22 +113,164 @@
     descr_sub = _binop_impl(sub)
     descr_mul = _binop_impl(mul)
     descr_div = _binop_impl(div)
+    descr_pow = _binop_impl(power)
+    descr_mod = _binop_impl(mod)
+
+    def _binop_right_impl(function):
+        signature = Signature()
+        def impl(self, space, w_other):
+            new_sig = self.signature.transition(signature)
+            w_other = FloatWrapper(space.float_w(w_other))
+            res = Call2(
+                function,
+                w_other,
+                self,
+                new_sig.transition(w_other.signature)
+            )
+            self.invalidates.append(res)
+            return space.wrap(res)
+        return func_with_new_name(impl,
+                                  "binop_right_%s_impl" % function.__name__)
+
+    descr_radd = _binop_right_impl(add)
+    descr_rsub = _binop_right_impl(sub)
+    descr_rmul = _binop_right_impl(mul)
+    descr_rdiv = _binop_right_impl(div)
+    descr_rpow = _binop_right_impl(power)
+    descr_rmod = _binop_right_impl(mod)
+
+    def _reduce_sum_prod_impl(function, init):
+        reduce_driver = jit.JitDriver(greens=['signature'],
+                         reds = ['i', 'size', 'self', 'result'])
+
+        def loop(self, result, size):
+            i = 0
+            while i < size:
+                reduce_driver.jit_merge_point(signature=self.signature,
+                                              self=self, size=size, i=i,
+                                              result=result)
+                result = function(result, self.eval(i))
+                i += 1
+            return result
+
+        def impl(self, space):
+            return space.wrap(loop(self, init, self.find_size()))
+        return func_with_new_name(impl, "reduce_%s_impl" % function.__name__)
+
+    def _reduce_max_min_impl(function):
+        reduce_driver = jit.JitDriver(greens=['signature'],
+                         reds = ['i', 'size', 'self', 'result'])
+        def loop(self, result, size):
+            i = 1
+            while i < size:
+                reduce_driver.jit_merge_point(signature=self.signature,
+                                              self=self, size=size, i=i,
+                                              result=result)
+                result = function(result, self.eval(i))
+                i += 1
+            return result
+
+        def impl(self, space):
+            size = self.find_size()
+            if size == 0:
+                raise OperationError(space.w_ValueError,
+                    space.wrap("Can't call %s on zero-size arrays" \
+                            % function.__name__))
+            return space.wrap(loop(self, self.eval(0), size))
+        return func_with_new_name(impl, "reduce_%s_impl" % function.__name__)
+
+    def _reduce_argmax_argmin_impl(function):
+        reduce_driver = jit.JitDriver(greens=['signature'],
+                         reds = ['i', 'size', 'result', 'self', 'cur_best'])
+        def loop(self, size):
+            result = 0
+            cur_best = self.eval(0)
+            i = 1
+            while i < size:
+                reduce_driver.jit_merge_point(signature=self.signature,
+                                              self=self, size=size, i=i,
+                                              result=result, cur_best=cur_best)
+                new_best = function(cur_best, self.eval(i))
+                if new_best != cur_best:
+                    result = i
+                    cur_best = new_best
+                i += 1
+            return result
+        def impl(self, space):
+            size = self.find_size()
+            if size == 0:
+                raise OperationError(space.w_ValueError,
+                    space.wrap("Can't call %s on zero-size arrays" \
+                            % function.__name__))
+            return space.wrap(loop(self, size))
+        return func_with_new_name(impl, "reduce_arg%s_impl" % function.__name__)
+
+    def _all(self):
+        size = self.find_size()
+        i = 0
+        while i < size:
+            all_driver.jit_merge_point(signature=self.signature, self=self, size=size, i=i)
+            if not self.eval(i):
+                return False
+            i += 1
+        return True
+    def descr_all(self, space):
+        return space.wrap(self._all())
+
+    def _any(self):
+        size = self.find_size()
+        i = 0
+        while i < size:
+            any_driver.jit_merge_point(signature=self.signature, self=self, size=size, i=i)
+            if self.eval(i):
+                return True
+            i += 1
+        return False
+    def descr_any(self, space):
+        return space.wrap(self._any())
+
+    descr_sum = _reduce_sum_prod_impl(add, 0.0)
+    descr_prod = _reduce_sum_prod_impl(mul, 1.0)
+    descr_max = _reduce_max_min_impl(maximum)
+    descr_min = _reduce_max_min_impl(minimum)
+    descr_argmax = _reduce_argmax_argmin_impl(maximum)
+    descr_argmin = _reduce_argmax_argmin_impl(minimum)
+
+    def descr_dot(self, space, w_other):
+        if isinstance(w_other, BaseArray):
+            w_res = self.descr_mul(space, w_other)
+            assert isinstance(w_res, BaseArray)
+            return w_res.descr_sum(space)
+        else:
+            return self.descr_mul(space, w_other)
 
     def get_concrete(self):
         raise NotImplementedError
 
+    def descr_get_shape(self, space):
+        return space.newtuple([self.descr_len(space)])
+
     def descr_len(self, space):
         return self.get_concrete().descr_len(space)
 
-    @unwrap_spec(item=int)
-    def descr_getitem(self, space, item):
-        return self.get_concrete().descr_getitem(space, item)
+    def descr_getitem(self, space, w_idx):
+        # TODO: indexing by tuples
+        start, stop, step, slice_length = space.decode_index4(w_idx, self.find_size())
+        if step == 0:
+            # Single index
+            return space.wrap(self.get_concrete().getitem(start))
+        else:
+            # Slice
+            res = SingleDimSlice(start, stop, step, slice_length, self, self.signature.transition(SingleDimSlice.static_signature))
+            return space.wrap(res)
 
     @unwrap_spec(item=int, value=float)
     def descr_setitem(self, space, item, value):
         self.invalidated()
         return self.get_concrete().descr_setitem(space, item, value)
 
+    def descr_mean(self, space):
+        return space.wrap(space.float_w(self.descr_sum(space))/self.find_size())
 
 class FloatWrapper(BaseArray):
     """
@@ -119,6 +298,10 @@
         self.forced_result = None
         self.signature = signature
 
+    def _del_sources(self):
+        # Function for deleting references to source arrays, to allow garbage-collecting them
+        raise NotImplementedError
+
     def compute(self):
         i = 0
         signature = self.signature
@@ -135,6 +318,7 @@
     def force_if_needed(self):
         if self.forced_result is None:
             self.forced_result = self.compute()
+            self._del_sources()
 
     def get_concrete(self):
         self.force_if_needed()
@@ -145,6 +329,13 @@
             return self.forced_result.eval(i)
         return self._eval(i)
 
+    def find_size(self):
+        if self.forced_result is not None:
+            # The result has been computed and sources may be unavailable
+            return self.forced_result.find_size()
+        return self._find_size()
+
+
 class Call1(VirtualArray):
     _immutable_fields_ = ["function", "values"]
 
@@ -153,7 +344,10 @@
         self.function = function
         self.values = values
 
-    def find_size(self):
+    def _del_sources(self):
+        self.values = None
+
+    def _find_size(self):
         return self.values.find_size()
 
     def _eval(self, i):
@@ -164,13 +358,18 @@
     Intermediate class for performing binary operations.
     """
     _immutable_fields_ = ["function", "left", "right"]
+
     def __init__(self, function, left, right, signature):
         VirtualArray.__init__(self, signature)
         self.function = function
         self.left = left
         self.right = right
 
-    def find_size(self):
+    def _del_sources(self):
+        self.left = None
+        self.right = None
+
+    def _find_size(self):
         try:
             return self.left.find_size()
         except ValueError:
@@ -181,6 +380,58 @@
         lhs, rhs = self.left.eval(i), self.right.eval(i)
         return self.function(lhs, rhs)
 
+class ViewArray(BaseArray):
+    """
+    Class for representing views of arrays, they will reflect changes of parent
+    arrays. Example: slices
+    """
+    _immutable_fields_ = ["parent"]
+
+    def __init__(self, parent, signature):
+        BaseArray.__init__(self)
+        self.signature = signature
+        self.parent = parent
+        self.invalidates = parent.invalidates
+
+    def get_concrete(self):
+        # in fact, ViewArray never gets "concrete" as it never stores data.
+        # This implementation is needed for BaseArray getitem/setitem to work,
+        # can be refactored.
+        return self
+
+    def eval(self, i):
+        return self.parent.eval(self.calc_index(i))
+
+    def getitem(self, item):
+        return self.parent.getitem(self.calc_index(item))
+
+    @unwrap_spec(item=int, value=float)
+    def descr_setitem(self, space, item, value):
+        return self.parent.descr_setitem(space, self.calc_index(item), value)
+
+    def descr_len(self, space):
+        return space.wrap(self.find_size())
+
+    def calc_index(self, item):
+        raise NotImplementedError
+
+class SingleDimSlice(ViewArray):
+    _immutable_fields_ = ["start", "stop", "step", "size"]
+    static_signature = Signature()
+
+    def __init__(self, start, stop, step, slice_length, parent, signature):
+        ViewArray.__init__(self, parent, signature)
+        self.start = start
+        self.stop = stop
+        self.step = step
+        self.size = slice_length
+
+    def find_size(self):
+        return self.size
+
+    def calc_index(self, item):
+        return (self.start + item * self.step)
+
 
 class SingleDimArray(BaseArray):
     signature = Signature()
@@ -215,10 +466,8 @@
     def descr_len(self, space):
         return space.wrap(self.size)
 
-    @unwrap_spec(item=int)
-    def descr_getitem(self, space, item):
-        item = self.getindex(space, item)
-        return space.wrap(self.storage[item])
+    def getitem(self, item):
+        return self.storage[item]
 
     @unwrap_spec(item=int, value=float)
     def descr_setitem(self, space, item, value):
@@ -238,20 +487,51 @@
         i += 1
     return space.wrap(arr)
 
- at unwrap_spec(ObjSpace, int)
+ at unwrap_spec(size=int)
 def zeros(space, size):
     return space.wrap(SingleDimArray(size))
 
+ at unwrap_spec(size=int)
+def ones(space, size):
+    arr = SingleDimArray(size)
+    for i in xrange(size):
+        arr.storage[i] = 1.0
+    return space.wrap(arr)
 
 BaseArray.typedef = TypeDef(
     'numarray',
     __new__ = interp2app(descr_new_numarray),
+
+    shape = GetSetProperty(BaseArray.descr_get_shape),
+
     __len__ = interp2app(BaseArray.descr_len),
     __getitem__ = interp2app(BaseArray.descr_getitem),
     __setitem__ = interp2app(BaseArray.descr_setitem),
 
+    __pos__ = interp2app(BaseArray.descr_pos),
+    __neg__ = interp2app(BaseArray.descr_neg),
+    __abs__ = interp2app(BaseArray.descr_abs),
     __add__ = interp2app(BaseArray.descr_add),
     __sub__ = interp2app(BaseArray.descr_sub),
     __mul__ = interp2app(BaseArray.descr_mul),
     __div__ = interp2app(BaseArray.descr_div),
-)
\ No newline at end of file
+    __pow__ = interp2app(BaseArray.descr_pow),
+    __mod__ = interp2app(BaseArray.descr_mod),
+    __radd__ = interp2app(BaseArray.descr_radd),
+    __rsub__ = interp2app(BaseArray.descr_rsub),
+    __rmul__ = interp2app(BaseArray.descr_rmul),
+    __rdiv__ = interp2app(BaseArray.descr_rdiv),
+    __rpow__ = interp2app(BaseArray.descr_rpow),
+    __rmod__ = interp2app(BaseArray.descr_rmod),
+
+    mean = interp2app(BaseArray.descr_mean),
+    sum = interp2app(BaseArray.descr_sum),
+    prod = interp2app(BaseArray.descr_prod),
+    max = interp2app(BaseArray.descr_max),
+    min = interp2app(BaseArray.descr_min),
+    argmax = interp2app(BaseArray.descr_argmax),
+    argmin = interp2app(BaseArray.descr_argmin),
+    all = interp2app(BaseArray.descr_all),
+    any = interp2app(BaseArray.descr_any),
+    dot = interp2app(BaseArray.descr_dot),
+)
diff --git a/pypy/module/micronumpy/interp_support.py b/pypy/module/micronumpy/interp_support.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/interp_support.py
@@ -0,0 +1,32 @@
+
+from pypy.rlib.rstruct.runpack import runpack
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.error import OperationError
+from pypy.module.micronumpy.interp_numarray import SingleDimArray
+
+FLOAT_SIZE = rffi.sizeof(lltype.Float)
+
+ at unwrap_spec(s=str)
+def fromstring(space, s):
+    length = len(s)
+
+    if length % FLOAT_SIZE == 0:
+        number = length/FLOAT_SIZE
+    else:
+        raise OperationError(space.w_ValueError, space.wrap(
+            "string length %d not divisable by %d" % (length, FLOAT_SIZE)))
+
+    a = SingleDimArray(number)
+
+    start = 0
+    end = FLOAT_SIZE
+    i = 0
+    while i < number:
+        part = s[start:end]
+        a.storage[i] = runpack('d', part)
+        i += 1
+        start += FLOAT_SIZE
+        end += FLOAT_SIZE
+
+    return space.wrap(a)
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -8,22 +8,24 @@
 
 def ufunc(func):
     signature = Signature()
-    @unwrap_spec(array=BaseArray)
-    def impl(space, array):
-        w_res = Call1(func, array, array.signature.transition(signature))
-        array.invalidates.append(w_res)
-        return w_res
+    def impl(space, w_obj):
+        if isinstance(w_obj, BaseArray):
+            w_res = Call1(func, w_obj, w_obj.signature.transition(signature))
+            w_obj.invalidates.append(w_res)
+            return w_res
+        return space.wrap(func(space.float_w(w_obj)))
     return func_with_new_name(impl, "%s_dispatcher" % func.__name__)
 
 def ufunc2(func):
     signature = Signature()
-    @unwrap_spec(larray=BaseArray, rarray=BaseArray)
-    def impl(space, larray, rarray):
-        new_sig = larray.signature.transition(signature).transition(rarray.signature)
-        w_res = Call2(func, larray, rarray, new_sig)
-        larray.invalidates.append(w_res)
-        rarray.invalidates.append(w_res)
-        return w_res
+    def impl(space, w_lhs, w_rhs):
+        if isinstance(w_lhs, BaseArray) and isinstance(w_rhs, BaseArray):
+            new_sig = w_lhs.signature.transition(signature).transition(w_rhs.signature)
+            w_res = Call2(func, w_lhs, w_rhs, new_sig)
+            w_lhs.invalidates.append(w_res)
+            w_rhs.invalidates.append(w_res)
+            return w_res
+        return space.wrap(func(space.float_w(w_lhs), space.float_w(w_rhs)))
     return func_with_new_name(impl, "%s_dispatcher" % func.__name__)
 
 @ufunc
@@ -60,6 +62,10 @@
     return 1.0 / value
 
 @ufunc
+def floor(value):
+    return math.floor(value)
+
+ at ufunc
 def sign(value):
     if value == 0.0:
         return 0.0
diff --git a/pypy/module/micronumpy/test/test_base.py b/pypy/module/micronumpy/test/test_base.py
--- a/pypy/module/micronumpy/test/test_base.py
+++ b/pypy/module/micronumpy/test/test_base.py
@@ -16,4 +16,14 @@
         v3 = ar.descr_add(space, FloatWrapper(1.0))
         assert v2.signature is v3.signature
         v4 = ar.descr_add(space, ar)
-        assert v1.signature is v4.signature
\ No newline at end of file
+        assert v1.signature is v4.signature
+
+    def test_slice_signature(self, space):
+        ar = SingleDimArray(10)
+        v1 = ar.descr_getitem(space, space.wrap(slice(1, 5, 1)))
+        v2 = ar.descr_getitem(space, space.wrap(slice(4, 6, 1)))
+        assert v1.signature is v2.signature
+
+        v3 = ar.descr_add(space, v1)
+        v4 = ar.descr_add(space, v2)
+        assert v3.signature is v4.signature
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_module.py b/pypy/module/micronumpy/test/test_module.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/test/test_module.py
@@ -0,0 +1,13 @@
+from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+
+
+class AppTestNumPyModule(BaseNumpyAppTest):
+    def test_mean(self):
+        from numpy import array, mean
+        assert mean(array(range(5))) == 2.0
+        assert mean(range(5)) == 2.0
+
+    def test_average(self):
+        from numpy import array, average
+        assert average(range(10)) == 4.5
+        assert average(array(range(10))) == 4.5
\ No newline at end of file
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -1,6 +1,7 @@
 import py
 
 from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+from pypy.conftest import gettestobjspace
 
 
 class AppTestNumArray(BaseNumpyAppTest):
@@ -18,6 +19,25 @@
         a[13] = 5.3
         assert a[13] == 5.3
 
+    def test_empty(self):
+        """
+        Test that empty() works.
+        """
+
+        from numpy import empty
+        a = empty(2)
+        a[1] = 1.0
+        assert a[1] == 1.0
+
+    def test_ones(self):
+        from numpy import ones
+        a = ones(3)
+        assert len(a) == 3
+        assert a[0] == 1
+        raises(IndexError, "a[3]")
+        a[2] = 4
+        assert a[2] == 4
+
     def test_iterator_init(self):
         from numpy import array
         a = array(range(5))
@@ -46,6 +66,15 @@
         assert len(a) == 5
         assert len(a + a) == 5
 
+    def test_shape(self):
+        from numpy import array
+        a = array(range(5))
+        assert a.shape == (5,)
+        b = a + a
+        assert b.shape == (5,)
+        c = a[:3]
+        assert c.shape == (3,)
+
     def test_add(self):
         from numpy import array
         a = array(range(5))
@@ -126,6 +155,72 @@
         for i in range(5):
             assert b[i] == i / 5.0
 
+    def test_pow(self):
+        from numpy import array
+        a = array(range(5))
+        b = a ** a
+        for i in range(5):
+            print b[i], i**i
+            assert b[i] == i**i
+
+    def test_pow_other(self):
+        from numpy import array
+        a = array(range(5))
+        b = array([2, 2, 2, 2, 2])
+        c = a ** b
+        for i in range(5):
+            assert c[i] == i ** 2
+
+    def test_pow_constant(self):
+        from numpy import array
+        a = array(range(5))
+        b = a ** 2
+        for i in range(5):
+            assert b[i] == i ** 2
+
+    def test_mod(self):
+        from numpy import array
+        a = array(range(1,6))
+        b = a % a
+        for i in range(5):
+            assert b[i] == 0
+
+    def test_mod_other(self):
+        from numpy import array
+        a = array(range(5))
+        b = array([2, 2, 2, 2, 2])
+        c = a % b
+        for i in range(5):
+            assert c[i] == i % 2
+
+    def test_mod_constant(self):
+        from numpy import array
+        a = array(range(5))
+        b = a % 2
+        for i in range(5):
+            assert b[i] == i % 2
+
+    def test_pos(self):
+        from numpy import array
+        a = array([1.,-2.,3.,-4.,-5.])
+        b = +a
+        for i in range(5):
+            assert b[i] == a[i]
+
+    def test_neg(self):
+        from numpy import array
+        a = array([1.,-2.,3.,-4.,-5.])
+        b = -a
+        for i in range(5):
+            assert b[i] == -a[i]
+
+    def test_abs(self):
+        from numpy import array
+        a = array([1.,-2.,3.,-4.,-5.])
+        b = abs(a)
+        for i in range(5):
+            assert b[i] == abs(a[i])
+
     def test_auto_force(self):
         from numpy import array
         a = array(range(5))
@@ -138,4 +233,141 @@
         b = a + a
         c = b + b
         b[1] = 5
-        assert c[1] == 4
\ No newline at end of file
+        assert c[1] == 4
+
+    def test_getslice(self):
+        from numpy import array
+        a = array(range(5))
+        s = a[1:5]
+        assert len(s) == 4
+        for i in range(4):
+            assert s[i] == a[i+1]
+
+    def test_getslice_step(self):
+        from numpy import array
+        a = array(range(10))
+        s = a[1:9:2]
+        assert len(s) == 4
+        for i in range(4):
+            assert s[i] == a[2*i+1]
+
+    def test_slice_update(self):
+        from numpy import array
+        a = array(range(5))
+        s = a[0:3]
+        s[1] = 10
+        assert a[1] == 10
+        a[2] = 20
+        assert s[2] == 20
+
+
+    def test_slice_invaidate(self):
+        # check that slice shares invalidation list with
+        from numpy import array
+        a = array(range(5))
+        s = a[0:2]
+        b = array([10,11])
+        c = s + b
+        a[0] = 100
+        assert c[0] == 10
+        assert c[1] == 12
+        d = s + b
+        a[1] = 101
+        assert d[0] == 110
+        assert d[1] == 12
+
+    def test_mean(self):
+        from numpy import array
+        a = array(range(5))
+        assert a.mean() == 2.0
+        assert a[:4].mean() == 1.5
+
+    def test_sum(self):
+        from numpy import array
+        a = array(range(5))
+        assert a.sum() == 10.0
+        assert a[:4].sum() == 6.0
+
+    def test_prod(self):
+        from numpy import array
+        a = array(range(1,6))
+        assert a.prod() == 120.0
+        assert a[:4].prod() == 24.0
+
+    def test_max(self):
+        from numpy import array
+        a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
+        assert a.max() == 5.7
+        b = array([])
+        raises(ValueError, "b.max()")
+
+    def test_max_add(self):
+        from numpy import array
+        a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
+        assert (a+a).max() == 11.4
+
+    def test_min(self):
+        from numpy import array
+        a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
+        assert a.min() == -3.0
+        b = array([])
+        raises(ValueError, "b.min()")
+
+    def test_argmax(self):
+        from numpy import array
+        a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
+        assert a.argmax() == 2
+        b = array([])
+        raises(ValueError, "b.argmax()")
+
+    def test_argmin(self):
+        from numpy import array
+        a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
+        assert a.argmin() == 3
+        b = array([])
+        raises(ValueError, "b.argmin()")
+
+    def test_all(self):
+        from numpy import array
+        a = array(range(5))
+        assert a.all() == False
+        a[0] = 3.0
+        assert a.all() == True
+        b = array([])
+        assert b.all() == True
+
+    def test_any(self):
+        from numpy import array, zeros
+        a = array(range(5))
+        assert a.any() == True
+        b = zeros(5)
+        assert b.any() == False
+        c = array([])
+        assert c.any() == False
+
+    def test_dot(self):
+        from numpy import array
+        a = array(range(5))
+        assert a.dot(a) == 30.0
+
+    def test_dot_constant(self):
+        from numpy import array
+        a = array(range(5))
+        b = a.dot(2.5)
+        for i in xrange(5):
+            assert b[i] == 2.5*a[i]
+
+
+class AppTestSupport(object):
+    def setup_class(cls):
+        import struct
+        cls.space = gettestobjspace(usemodules=('micronumpy',))
+        cls.w_data = cls.space.wrap(struct.pack('dddd', 1, 2, 3, 4))
+
+    def test_fromstring(self):
+        from numpy import fromstring
+        a = fromstring(self.data)
+        for i in range(4):
+            assert a[i] == i + 1
+        raises(ValueError, fromstring, "abc")
+
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -3,6 +3,13 @@
 
 
 class AppTestUfuncs(BaseNumpyAppTest):
+    def test_single_item(self):
+        from numpy import negative, sign, minimum
+
+        assert negative(5.0) == -5.0
+        assert sign(-0.0) == 0.0
+        assert minimum(2.0, 3.0) == 2.0
+
     def test_negative(self):
         from numpy import array, negative
 
@@ -60,6 +67,15 @@
         for i in range(4):
             assert b[i] == reference[i]
 
+    def test_floor(self):
+        from numpy import array, floor
+
+        reference = [-2.0, -1.0, 0.0, 1.0, 1.0]
+        a = array([-1.4, -1.0, 0.0, 1.0, 1.4])
+        b = floor(a)
+        for i in range(5):
+            assert b[i] == reference[i]
+
     def test_copysign(self):
         from numpy import array, copysign
 
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -1,19 +1,22 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
+from pypy.rpython.test.test_llinterp import interpret
 from pypy.module.micronumpy.interp_numarray import (SingleDimArray, Signature,
-    FloatWrapper, Call1, Call2, add, mul)
+    FloatWrapper, Call2, SingleDimSlice, add, mul, neg, Call1)
 from pypy.module.micronumpy.interp_ufuncs import negative
-
+from pypy.module.micronumpy.compile import numpy_compile
+from pypy.rlib.objectmodel import specialize
 
 class FakeSpace(object):
-    pass
+    w_ValueError = None
+    @specialize.argtype(1)
+    def wrap(self, v):
+        return v
 
 class TestNumpyJIt(LLJitMixin):
     def setup_class(cls):
         cls.space = FakeSpace()
 
     def test_add(self):
-        space = self.space
-
         def f(i):
             ar = SingleDimArray(i)
             v = Call2(add, ar, ar, Signature())
@@ -26,8 +29,6 @@
         assert result == f(5)
 
     def test_floatadd(self):
-        space = self.space
-
         def f(i):
             ar = SingleDimArray(i)
             v = Call2(add, ar, FloatWrapper(4.5), Signature())
@@ -39,11 +40,128 @@
                           "int_lt": 1, "guard_true": 1, "jump": 1})
         assert result == f(5)
 
-    def test_already_forecd(self):
+    def test_neg(self):
         space = self.space
 
         def f(i):
             ar = SingleDimArray(i)
+            v = Call1(neg, ar, Signature())
+            return v.get_concrete().storage[3]
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 1, "float_neg": 1,
+                          "setarrayitem_raw": 1, "int_add": 1,
+                          "int_lt": 1, "guard_true": 1, "jump": 1})
+
+        assert result == f(5)
+
+    def test_sum(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            return ar.descr_add(space, ar).descr_sum(space)
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 2,
+                          "int_add": 1,
+                          "int_lt": 1, "guard_true": 1, "jump": 1})
+        assert result == f(5)
+
+    def test_prod(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            return ar.descr_add(space, ar).descr_prod(space)
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
+                          "float_mul": 1, "int_add": 1,
+                          "int_lt": 1, "guard_true": 1, "jump": 1})
+        assert result == f(5)
+
+    def test_max(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            j = 0
+            while j < i:
+                ar.get_concrete().storage[j] = float(j)
+                j += 1
+            return ar.descr_add(space, ar).descr_max(space)
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
+                          "float_gt": 1, "int_add": 1,
+                          "int_lt": 1, "guard_true": 1, 
+                          "guard_false": 1, "jump": 1})
+
+    def test_min(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            j = 0
+            while j < i:
+                ar.get_concrete().storage[j] = float(j)
+                j += 1
+            return ar.descr_add(space, ar).descr_min(space)
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
+                           "float_lt": 1, "int_add": 1,
+                           "int_lt": 1, "guard_true": 2,
+                           "jump": 1})
+
+    def test_argmin(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            j = 0
+            while j < i:
+                ar.get_concrete().storage[j] = float(j)
+                j += 1
+            return ar.descr_add(space, ar).descr_argmin(space)
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
+                           "float_lt": 1, "int_add": 1,
+                           "int_lt": 1, "guard_true": 2,
+                           "jump": 1})
+
+    def test_all(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            j = 0
+            while j < i:
+                ar.get_concrete().storage[j] = 1.0
+                j += 1
+            return ar.descr_add(space, ar).descr_all(space)
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
+                          "int_add": 1, "float_ne": 1,
+                          "int_lt": 1, "guard_true": 2, "jump": 1})
+
+    def test_any(self):
+        space = self.space
+
+        def f(i):
+            ar = SingleDimArray(i)
+            return ar.descr_add(space, ar).descr_any(space)
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({"getarrayitem_raw": 2, "float_add": 1,
+                          "int_add": 1, "float_ne": 1, "guard_false": 1,
+                          "int_lt": 1, "guard_true": 1, "jump": 1})
+
+    def test_already_forecd(self):
+        def f(i):
+            ar = SingleDimArray(i)
             v1 = Call2(add, ar, FloatWrapper(4.5), Signature())
             v2 = Call2(mul, v1, FloatWrapper(4.5), Signature())
             v1.force_if_needed()
@@ -91,4 +209,50 @@
 
         self.meta_interp(f, [5], listops=True, backendopt=True)
         # This is 3, not 2 because there is a bridge for the exit.
-        self.check_loop_count(3)
\ No newline at end of file
+        self.check_loop_count(3)
+
+    def test_slice(self):
+        def f(i):
+            step = 3
+            ar = SingleDimArray(step*i)
+            s = SingleDimSlice(0, step*i, step, i, ar, ar.signature.transition(SingleDimSlice.static_signature))
+            v = Call2(add, s, s, Signature())
+            return v.get_concrete().storage[3]
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({'int_mul': 1, 'getarrayitem_raw': 2, 'float_add': 1,
+                          'setarrayitem_raw': 1, 'int_add': 1,
+                          'int_lt': 1, 'guard_true': 1, 'jump': 1})
+        assert result == f(5)
+
+    def test_slice2(self):
+        def f(i):
+            step1 = 2
+            step2 = 3
+            ar = SingleDimArray(step2*i)
+            s1 = SingleDimSlice(0, step1*i, step1, i, ar, ar.signature.transition(SingleDimSlice.static_signature))
+            s2 = SingleDimSlice(0, step2*i, step2, i, ar, ar.signature.transition(SingleDimSlice.static_signature))
+            v = Call2(add, s1, s2, Signature())
+            return v.get_concrete().storage[3]
+
+        result = self.meta_interp(f, [5], listops=True, backendopt=True)
+        self.check_loops({'int_mul': 2, 'getarrayitem_raw': 2, 'float_add': 1,
+                          'setarrayitem_raw': 1, 'int_add': 1,
+                          'int_lt': 1, 'guard_true': 1, 'jump': 1})
+        assert result == f(5)
+
+class TestTranslation(object):
+    def test_compile(self):
+        x = numpy_compile('aa+f*f/a-', 10)
+        x = x.compute()
+        assert isinstance(x, SingleDimArray)
+        assert x.size == 10
+        assert x.storage[0] == 0
+        assert x.storage[1] == ((1 + 1) * 1.2) / 1.2 - 1
+    
+    def test_translation(self):
+        # we import main to check if the target compiles
+        from pypy.translator.goal.targetnumpystandalone import main
+        from pypy.rpython.annlowlevel import llstr
+        
+        interpret(main, [llstr('af+'), 100])
diff --git a/pypy/module/operator/app_operator.py b/pypy/module/operator/app_operator.py
--- a/pypy/module/operator/app_operator.py
+++ b/pypy/module/operator/app_operator.py
@@ -4,6 +4,7 @@
 This module exports a set of operators as functions. E.g. operator.add(x,y) is
 equivalent to x+y.
 '''
+from __pypy__ import builtinify
 
 def countOf(a,b): 
     'countOf(a, b) -- Return the number of times b occurs in a.'
@@ -66,50 +67,56 @@
     a[b:c] = d 
 __setslice__ = setslice
 
-class attrgetter(object):
 
-    def __init__(self, attr, *attrs):
-        self.attrs = (attr,) + attrs
+def attrgetter(attr, *attrs):
+    if attrs:
+        getters = [single_attr_getter(a) for a in (attr,) + attrs]
+        def getter(obj):
+            return tuple([getter(obj) for getter in getters])
+    else:
+        getter = single_attr_getter(attr)
+    return builtinify(getter)
 
-    def _resolve_attr(self, obj, attr):
-        last = 0
-        while True:
-            try:
-                dot = attr.find(".", last)
-            except AttributeError:
-                raise TypeError
-            if dot > 0:
-                obj = getattr(obj, attr[last:dot])
-                last = dot + 1
-            else:
-                return getattr(obj, attr[last:])
+def single_attr_getter(attr):
+    if not isinstance(attr, str):
+        if not isinstance(attr, unicode):
+            def _raise_typeerror(obj):
+                raise TypeError("argument must be a string, not %r" %
+                                (type(attr).__name__,))
+            return _raise_typeerror
+        attr = attr.encode('ascii')
+    #
+    def make_getter(name, prevfn=None):
+        if prevfn is None:
+            def getter(obj):
+                return getattr(obj, name)
+        else:
+            def getter(obj):
+                return getattr(prevfn(obj), name)
+        return getter
+    #
+    last = 0
+    getter = None
+    while True:
+        dot = attr.find(".", last)
+        if dot < 0: break
+        getter = make_getter(attr[last:dot], getter)
+        last = dot + 1
+    return make_getter(attr[last:], getter)
 
-    def __call__(self, obj):
-        if len(self.attrs) == 1:
-            return self._resolve_attr(obj, self.attrs[0])
-        return tuple(self._resolve_attr(obj, attr) for attr in self.attrs)
 
-class itemgetter(object):
+def itemgetter(item, *items):
+    if items:
+        list_of_indices = [item] + list(items)
+        def getter(obj):
+            return tuple([obj[i] for i in list_of_indices])
+    else:
+        def getter(obj):
+            return obj[item]
+    return builtinify(getter)
 
-    def __init__(self, item, *args):
-        self.items = args
-        self.item = item
 
-    def __call__(self, obj):
-        result = obj[self.item]
-
-        if self.items:
-            list = [result] + [obj[item] for item in self.items]
-            return tuple(list)
-
-        return result
-
-class methodcaller(object):
-
-    def __init__(self, method_name, *args, **kwargs):
-        self.method_name = method_name
-        self.args = args
-        self.kwargs = kwargs
-
-    def __call__(self, obj):
-        return getattr(obj, self.method_name)(*self.args, **self.kwargs)
+def methodcaller(method_name, *args, **kwargs):
+    def call(obj):
+        return getattr(obj, method_name)(*args, **kwargs)
+    return builtinify(call)
diff --git a/pypy/module/oracle/interp_variable.py b/pypy/module/oracle/interp_variable.py
--- a/pypy/module/oracle/interp_variable.py
+++ b/pypy/module/oracle/interp_variable.py
@@ -1484,7 +1484,7 @@
     raise OperationError(
         moduledict.w_NotSupportedError,
         space.wrap("Variable_TypeByValue(): unhandled data type %s" %
-                   (space.type(w_value).getname(space, '?'),)))
+                   (space.type(w_value).getname(space),)))
 
 def newByInputTypeHandler(space, cursor, w_inputTypeHandler, w_value, numElements):
     w_var = space.call(w_inputTypeHandler,
diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py
--- a/pypy/module/posix/app_posix.py
+++ b/pypy/module/posix/app_posix.py
@@ -107,6 +107,9 @@
 def tmpnam():
     """Return an absolute pathname of a file that did not exist at the
     time the call is made."""
+    from warnings import warn
+    warn(RuntimeWarning("tmpnam is a potential security risk to your program"))
+
     import tempfile
     return tempfile.mktemp()
 
@@ -114,6 +117,9 @@
     """Return an absolute pathname of a file that did not exist at the
     time the call is made.  The directory and a prefix may be specified
     as strings; they may be omitted or None if not needed."""
+    from warnings import warn
+    warn(RuntimeWarning("tempnam is a potential security risk to your program"))
+
     import tempfile
     return tempfile.mktemp('', prefix or 'tmp', dir)
 
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -847,6 +847,21 @@
                 assert os.path.basename(s1).startswith(prefix or 'tmp')
                 assert os.path.basename(s2).startswith(prefix or 'tmp')
 
+    def test_tmpnam_warning(self):
+        import warnings, os
+        #
+        def f_tmpnam_warning(): os.tmpnam()    # a single line
+        #
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            f_tmpnam_warning()
+            assert len(w) == 1
+            assert issubclass(w[-1].category, RuntimeWarning)
+            assert "potential security risk" in str(w[-1].message)
+            # check that the warning points to the call to os.tmpnam(),
+            # not to some code inside app_posix.py
+            assert w[-1].lineno == f_tmpnam_warning.func_code.co_firstlineno
+
 
 class AppTestEnvironment(object):
     def setup_class(cls):
diff --git a/pypy/module/pyexpat/__init__.py b/pypy/module/pyexpat/__init__.py
--- a/pypy/module/pyexpat/__init__.py
+++ b/pypy/module/pyexpat/__init__.py
@@ -2,6 +2,22 @@
 
 from pypy.interpreter.mixedmodule import MixedModule
 
+class ErrorsModule(MixedModule):
+    "Definition of pyexpat.errors module."
+
+    appleveldefs = {
+        }
+
+    interpleveldefs = {
+        }
+
+    def setup_after_space_initialization(self):
+        from pypy.module.pyexpat import interp_pyexpat
+        for name in interp_pyexpat.xml_error_list:
+            self.space.setattr(self, self.space.wrap(name),
+                    interp_pyexpat.ErrorString(self.space,
+                    getattr(interp_pyexpat, name)))
+
 class Module(MixedModule):
     "Python wrapper for Expat parser."
 
@@ -21,6 +37,10 @@
         'version_info':  'interp_pyexpat.get_expat_version_info(space)',
         }
 
+    submodules = {
+        'errors': ErrorsModule,
+    }
+
     for name in ['XML_PARAM_ENTITY_PARSING_NEVER',
                  'XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE',
                  'XML_PARAM_ENTITY_PARSING_ALWAYS']:
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -31,6 +31,48 @@
 XML_Content_Ptr = lltype.Ptr(lltype.ForwardReference())
 XML_Parser = rffi.COpaquePtr(typedef='XML_Parser')
 
+xml_error_list = [
+    "XML_ERROR_NO_MEMORY",
+    "XML_ERROR_SYNTAX",
+    "XML_ERROR_NO_ELEMENTS",
+    "XML_ERROR_INVALID_TOKEN",
+    "XML_ERROR_UNCLOSED_TOKEN",
+    "XML_ERROR_PARTIAL_CHAR",
+    "XML_ERROR_TAG_MISMATCH",
+    "XML_ERROR_DUPLICATE_ATTRIBUTE",
+    "XML_ERROR_JUNK_AFTER_DOC_ELEMENT",
+    "XML_ERROR_PARAM_ENTITY_REF",
+    "XML_ERROR_UNDEFINED_ENTITY",
+    "XML_ERROR_RECURSIVE_ENTITY_REF",
+    "XML_ERROR_ASYNC_ENTITY",
+    "XML_ERROR_BAD_CHAR_REF",
+    "XML_ERROR_BINARY_ENTITY_REF",
+    "XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF",
+    "XML_ERROR_MISPLACED_XML_PI",
+    "XML_ERROR_UNKNOWN_ENCODING",
+    "XML_ERROR_INCORRECT_ENCODING",
+    "XML_ERROR_UNCLOSED_CDATA_SECTION",
+    "XML_ERROR_EXTERNAL_ENTITY_HANDLING",
+    "XML_ERROR_NOT_STANDALONE",
+    "XML_ERROR_UNEXPECTED_STATE",
+    "XML_ERROR_ENTITY_DECLARED_IN_PE",
+    "XML_ERROR_FEATURE_REQUIRES_XML_DTD",
+    "XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING",
+    # Added in Expat 1.95.7.
+    "XML_ERROR_UNBOUND_PREFIX",
+    # Added in Expat 1.95.8.
+    "XML_ERROR_UNDECLARING_PREFIX",
+    "XML_ERROR_INCOMPLETE_PE",
+    "XML_ERROR_XML_DECL",
+    "XML_ERROR_TEXT_DECL",
+    "XML_ERROR_PUBLICID",
+    "XML_ERROR_SUSPENDED",
+    "XML_ERROR_NOT_SUSPENDED",
+    "XML_ERROR_ABORTED",
+    "XML_ERROR_FINISHED",
+    "XML_ERROR_SUSPEND_PE",
+    ]
+
 class CConfigure:
     _compilation_info_ = eci
     XML_Content = rffi_platform.Struct('XML_Content', [
@@ -56,6 +98,9 @@
     XML_FALSE = rffi_platform.ConstantInteger('XML_FALSE')
     XML_TRUE = rffi_platform.ConstantInteger('XML_TRUE')
 
+    for name in xml_error_list:
+        locals()[name] = rffi_platform.ConstantInteger(name)
+
 for k, v in rffi_platform.configure(CConfigure).items():
     globals()[k] = v
 
@@ -298,7 +343,8 @@
 XML_GetErrorCode = expat_external(
     'XML_GetErrorCode', [XML_Parser], rffi.INT)
 XML_ErrorString = expat_external(
-    'XML_ErrorString', [rffi.INT], rffi.CCHARP)
+    'XML_ErrorString', [rffi.INT],
+    rffi.CCHARP)
 XML_GetCurrentLineNumber = expat_external(
     'XML_GetCurrentLineNumber', [XML_Parser], rffi.INT)
 XML_GetErrorLineNumber = XML_GetCurrentLineNumber
@@ -691,7 +737,7 @@
     elif space.is_true(space.isinstance(w_encoding, space.w_str)):
         encoding = space.str_w(w_encoding)
     else:
-        type_name = space.type(w_encoding).getname(space, '?')
+        type_name = space.type(w_encoding).getname(space)
         raise OperationError(
             space.w_TypeError,
             space.wrap('ParserCreate() argument 1 must be string or None,'
@@ -711,7 +757,7 @@
                 space.wrap('namespace_separator must be at most one character,'
                            ' omitted, or None'))
     else:
-        type_name = space.type(w_namespace_separator).getname(space, '?')
+        type_name = space.type(w_namespace_separator).getname(space)
         raise OperationError(
             space.w_TypeError,
             space.wrap('ParserCreate() argument 2 must be string or None,'
diff --git a/pypy/module/pyexpat/test/test_parser.py b/pypy/module/pyexpat/test/test_parser.py
--- a/pypy/module/pyexpat/test/test_parser.py
+++ b/pypy/module/pyexpat/test/test_parser.py
@@ -38,7 +38,7 @@
         parser = pyexpat.ParserCreate()
         raises(pyexpat.ExpatError, "parser.Parse(xml, True)")
 
-    def test_encoding(self):
+    def test_encoding_argument(self):
         import pyexpat
         for encoding_arg in (None, 'utf-8', 'iso-8859-1'):
             for namespace_arg in (None, '{'):
@@ -68,7 +68,7 @@
         assert p.buffer_size == 150
         raises(TypeError, setattr, p, 'buffer_size', sys.maxint + 1)
 
-    def test_encoding(self):
+    def test_encoding_xml(self):
         # use one of the few encodings built-in in expat
         xml = "<?xml version='1.0' encoding='iso-8859-1'?><s>caf\xe9</s>"
         import pyexpat
@@ -120,3 +120,14 @@
             return True
         p.ExternalEntityRefHandler = handler
         p.Parse(xml)
+
+    def test_errors(self):
+        import types
+        import pyexpat
+        assert isinstance(pyexpat.errors, types.ModuleType)
+        # check a few random errors
+        assert pyexpat.errors.XML_ERROR_SYNTAX == 'syntax error'
+        assert (pyexpat.errors.XML_ERROR_INCORRECT_ENCODING ==
+               'encoding specified in XML declaration is incorrect')
+        assert (pyexpat.errors.XML_ERROR_XML_DECL ==
+                'XML declaration not well-formed')
diff --git a/pypy/module/pypyjit/__init__.py b/pypy/module/pypyjit/__init__.py
--- a/pypy/module/pypyjit/__init__.py
+++ b/pypy/module/pypyjit/__init__.py
@@ -8,6 +8,7 @@
         'set_param':    'interp_jit.set_param',
         'residual_call': 'interp_jit.residual_call',
         'set_compile_hook': 'interp_jit.set_compile_hook',
+        'DebugMergePoint': 'interp_resop.W_DebugMergePoint',
     }
 
     def setup_after_space_initialization(self):
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -16,10 +16,12 @@
 from pypy.interpreter.baseobjspace import ObjSpace, W_Root
 from opcode import opmap
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.nonconst import NonConstant
+from pypy.jit.metainterp.resoperation import rop
+from pypy.module.pypyjit.interp_resop import debug_merge_point_from_boxes
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
-                            'valuestackdepth', 'valuestack_w[*]',
-                            'fastlocals_w[*]',
+                            'valuestackdepth', 'locals_stack_w[*]',
                             'last_exception',
                             'lastblock',
                             'is_being_profiled',
@@ -46,6 +48,16 @@
     return (bytecode.co_flags & CO_GENERATOR) != 0
 
 
+def wrap_oplist(space, logops, operations):
+    list_w = []
+    for op in operations:
+        if op.getopnum() == rop.DEBUG_MERGE_POINT:
+            list_w.append(space.wrap(debug_merge_point_from_boxes(
+                op.getarglist())))
+        else:
+            list_w.append(space.wrap(logops.repr_of_resop(op)))
+    return list_w
+
 class PyPyJitDriver(JitDriver):
     reds = ['frame', 'ec']
     greens = ['next_instr', 'is_being_profiled', 'pycode']
@@ -57,11 +69,13 @@
         
         space = self.space
         cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
         if space.is_true(cache.w_compile_hook):
-            memo = {}
-            list_w = [space.wrap(logger.repr_of_resop(memo, op))
-                      for op in operations]
+            logops = logger._make_log_operations()
+            list_w = wrap_oplist(space, logops, operations)
             pycode = cast_base_ptr_to_instance(PyCode, ll_pycode)
+            cache.in_recursion = True
             try:
                 space.call_function(cache.w_compile_hook,
                                     space.wrap('main'),
@@ -72,14 +86,17 @@
                                     space.newlist(list_w))
             except OperationError, e:
                 e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
 
     def on_compile_bridge(self, logger, orig_looptoken, operations, n):
         space = self.space
         cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
         if space.is_true(cache.w_compile_hook):
-            memo = {}
-            list_w = [space.wrap(logger.repr_of_resop(memo, op))
-                      for op in operations]
+            logops = logger._make_log_operations()
+            list_w = wrap_oplist(space, logops, operations)
+            cache.in_recursion = True
             try:
                 space.call_function(cache.w_compile_hook,
                                     space.wrap('main'),
@@ -88,6 +105,7 @@
                                     space.newlist(list_w))
             except OperationError, e:
                 e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            cache.in_recursion = False
 
 pypyjitdriver = PyPyJitDriver(get_printable_location = get_printable_location,
                               get_jitcell_at = get_jitcell_at,
@@ -158,6 +176,8 @@
     '''Configure the tunable JIT parameters.
         * set_param(name=value, ...)            # as keyword arguments
         * set_param("name=value,name=value")    # as a user-supplied string
+        * set_param("off")                      # disable the jit
+        * set_param("default")                  # restore all defaults
     '''
     # XXXXXXXXX
     args_w, kwds_w = __args__.unpack()
@@ -191,6 +211,8 @@
     return space.call_args(w_callable, __args__)
 
 class Cache(object):
+    in_recursion = False
+    
     def __init__(self, space):
         self.w_compile_hook = space.w_None
 
@@ -209,8 +231,13 @@
     for jit merge point. in case it's `main` it'll be a tuple
     (code, offset, is_being_profiled)
 
+    Note that jit hook is not reentrant. It means that if the code
+    inside the jit hook is itself jitted, it will get compiled, but the
+    jit hook won't be called for that.
+
     XXX write down what else
     """
     cache = space.fromcache(Cache)
     cache.w_compile_hook = w_hook
+    cache.in_recursion = NonConstant(False)
     return space.w_None
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -0,0 +1,41 @@
+
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.baseobjspace import Wrappable, ObjSpace, W_Root
+from pypy.interpreter.gateway import unwrap_spec, interp2app
+from pypy.interpreter.pycode import PyCode
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
+from pypy.rpython.lltypesystem.rclass import OBJECT
+
+class W_DebugMergePoint(Wrappable):
+    """ A class representing debug_merge_point JIT operation
+    """
+
+    def __init__(self, mp_no, offset, pycode):
+        self.mp_no = mp_no
+        self.offset = offset
+        self.pycode = pycode
+
+    def descr_repr(self, space):
+        return space.wrap('DebugMergePoint()')
+
+ at unwrap_spec(mp_no=int, offset=int, pycode=PyCode)
+def new_debug_merge_point(space, w_tp, mp_no, offset, pycode):
+    return W_DebugMergePoint(mp_no, offset, pycode)
+
+def debug_merge_point_from_boxes(boxes):
+    mp_no = boxes[0].getint()
+    offset = boxes[2].getint()
+    llcode = lltype.cast_opaque_ptr(lltype.Ptr(OBJECT),
+                                    boxes[4].getref_base())
+    pycode = cast_base_ptr_to_instance(PyCode, llcode)
+    assert pycode is not None
+    return W_DebugMergePoint(mp_no, offset, pycode)
+
+W_DebugMergePoint.typedef = TypeDef(
+    'DebugMergePoint',
+    __new__ = interp2app(new_debug_merge_point),
+    __doc__ = W_DebugMergePoint.__doc__,
+    __repr__ = interp2app(W_DebugMergePoint.descr_repr),
+    code = interp_attrproperty('pycode', W_DebugMergePoint),
+)
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -14,7 +14,8 @@
             modname, _ = modname.split('.', 1)
         if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
-                       'posix', '_socket', '_sre', '_lsprof', '_weakref']:
+                       'posix', '_socket', '_sre', '_lsprof', '_weakref',
+                       '__pypy__', 'cStringIO']:
             return True
         return False
 
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -8,12 +8,13 @@
 from pypy.jit.metainterp.logger import Logger
 from pypy.rpython.annlowlevel import (cast_instance_to_base_ptr,
                                       cast_base_ptr_to_instance)
+from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.module.pypyjit.interp_jit import pypyjitdriver
 from pypy.jit.tool.oparser import parse
 from pypy.jit.metainterp.typesystem import llhelper
 
 class MockSD(object):
-    class cpu:
+    class cpu(object):
         ts = llhelper
 
 class AppTestJitHook(object):
@@ -27,14 +28,17 @@
             pass
         return f
         """)
+        cls.w_f = w_f
         ll_code = cast_instance_to_base_ptr(w_f.code)
+        code_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ll_code)
         logger = Logger(MockSD())
 
         oplist = parse("""
         [i1, i2]
         i3 = int_add(i1, i2)
+        debug_merge_point(0, 0, 0, 0, ConstPtr(ptr0))
         guard_true(i3) []
-        """).operations
+        """, namespace={'ptr0': code_gcref}).operations
 
         def interp_on_compile():
             pypyjitdriver.on_compile(logger, LoopToken(), oplist, 'loop',
@@ -63,7 +67,7 @@
         assert all[0][0][0].co_name == 'f'
         assert all[0][0][1] == 0
         assert all[0][0][2] == False
-        assert len(all[0][1]) == 2
+        assert len(all[0][1]) == 3
         assert 'int_add' in all[0][1][0]
         self.on_compile_bridge()
         assert len(all) == 2
@@ -87,3 +91,36 @@
             sys.stderr = prev
         assert 'jit hook' in s.getvalue()
         assert 'ZeroDivisionError' in s.getvalue()
+
+    def test_non_reentrant(self):
+        import pypyjit
+        l = []
+        
+        def hook(*args):
+            l.append(None)
+            self.on_compile()
+            self.on_compile_bridge()
+        
+        pypyjit.set_compile_hook(hook)
+        self.on_compile()
+        assert len(l) == 1 # and did not crash
+        self.on_compile_bridge()
+        assert len(l) == 2 # and did not crash
+        
+    def test_on_compile_types(self):
+        import pypyjit
+        l = []
+
+        def hook(*args):
+            l.append(args)
+
+        pypyjit.set_compile_hook(hook)
+        self.on_compile()
+        dmp = l[0][3][1]
+        assert isinstance(dmp, pypyjit.DebugMergePoint)
+        assert dmp.code is self.f.func_code
+
+    def test_creation(self):
+        import pypyjit
+        dmp = pypyjit.DebugMergePoint(0, 0, self.f.func_code)
+        assert dmp.code is self.f.func_code 
diff --git a/pypy/module/pypyjit/test/test_jit_setup.py b/pypy/module/pypyjit/test/test_jit_setup.py
--- a/pypy/module/pypyjit/test/test_jit_setup.py
+++ b/pypy/module/pypyjit/test/test_jit_setup.py
@@ -9,18 +9,49 @@
         # this just checks that the module is setting up things correctly, and
         # the resulting code makes sense on top of CPython.
         import pypyjit
-        pypyjit.set_param(threshold=5, inlining=1)
-        pypyjit.set_param("trace_eagerness=3,inlining=0")
+        try:
+            pypyjit.set_param(threshold=5, inlining=1)
+            pypyjit.set_param("trace_eagerness=3,inlining=0")
 
-        def f(x, y):
-            return x*y+1
+            def f(x, y):
+                return x*y+1
 
-        assert f(6, 7) == 43
+            assert f(6, 7) == 43
 
-        def gen(x):
-            i = 0
-            while i < x:
-                yield i*i
-                i += 1
+            def gen(x):
+                i = 0
+                while i < x:
+                    yield i*i
+                    i += 1
 
-        assert list(gen(3)) == [0, 1, 4]
+            assert list(gen(3)) == [0, 1, 4]
+        finally:
+            pypyjit.set_param('default')
+
+    def test_no_jit(self):
+        import pypyjit
+        was_called = []
+        def should_not_be_called(*args, **kwds):
+            was_called.append((args, kwds))
+        try:
+            pypyjit.set_param('off')
+            pypyjit.set_compile_hook(should_not_be_called)
+            def f():
+                pass
+            for i in range(2500):
+                f()
+            assert not was_called
+        finally:
+            pypyjit.set_compile_hook(None)
+            pypyjit.set_param('default')
+
+
+def test_interface_residual_call():
+    space = gettestobjspace(usemodules=['pypyjit'])
+    space.appexec([], """():
+        import pypyjit
+        def f(*args, **kwds):
+            return (args, kwds)
+        res = pypyjit.residual_call(f, 4, x=6)
+        assert res == ((4,), {'x': 6})
+    """)
diff --git a/pypy/module/pypyjit/test/test_pypy_c.py b/pypy/module/pypyjit/test/test_pypy_c.py
deleted file mode 100644
--- a/pypy/module/pypyjit/test/test_pypy_c.py
+++ /dev/null
@@ -1,430 +0,0 @@
-from pypy.conftest import gettestobjspace, option
-from pypy.tool.udir import udir
-import py
-from py.test import skip
-import sys, os, re
-import subprocess
-
-class BytecodeTrace(list):
-    def get_opnames(self, prefix=""):
-        return [op.getopname() for op in self
-                    if op.getopname().startswith(prefix)]
-
-    def __repr__(self):
-        return "%s%s" % (self.bytecode, list.__repr__(self))
-
-ZERO_OP_BYTECODES = [
-    'POP_TOP',
-    'ROT_TWO',
-    'ROT_THREE',
-    'DUP_TOP',
-    'ROT_FOUR',
-    'NOP',
-    'DUP_TOPX',
-    'LOAD_CONST',
-    'JUMP_FORWARD',
-    #'JUMP_ABSOLUTE' in theory, but contains signals stuff
-    #'LOAD_FAST' should be here, but currently needs a guard for nonzeroness
-    'STORE_FAST',
-    ]
-
-
-r_bridge = re.compile(r"bridge out of Guard (\d+)")
-
-def from_entry_bridge(text, allparts):
-    firstline = text.splitlines()[0]
-    if 'entry bridge' in firstline:
-        return True
-    match = r_bridge.search(firstline)
-    if match:
-        search = '<Guard' + match.group(1) + '>'
-        for part in allparts:
-            if search in part:
-                break
-        else:
-            raise AssertionError, "%s not found??" % (search,)
-        return from_entry_bridge(part, allparts)
-    return False
-
-def test_from_entry_bridge():
-    assert from_entry_bridge(
-        "# Loop 4 : entry bridge with 31 ops\n[p0, etc", [])
-    assert not from_entry_bridge(
-        "# Loop 1 : loop with 31 ops\n[p0, p1, etc", [])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 5 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n"])
-    assert not from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : loop with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-    assert from_entry_bridge(
-        "# bridge out of Guard 51 with 24 ops\n[p0, p1, etc",
-        ["# Loop 1 : entry bridge with 31 ops\n"
-             "[p0, p1]\n"
-             "guard_stuff(descr=<Guard5>)\n",
-         "# bridge out of Guard 5 with 13 ops\n"
-             "[p0, p1]\n"
-             "guard_other(p1, descr=<Guard51>)\n"])
-
-
-class PyPyCJITTests(object):
-    def run_source(self, source, expected_max_ops, *testcases, **kwds):
-        assert isinstance(expected_max_ops, int)
-        threshold = kwds.pop('threshold', 3)
-        self.count_debug_merge_point = \
-                                     kwds.pop('count_debug_merge_point', True)
-        if kwds:
-            raise TypeError, 'Unsupported keyword arguments: %s' % kwds.keys()
-        source = py.code.Source(source)
-        filepath = self.tmpdir.join('case%d.py' % self.counter)
-        logfilepath = filepath.new(ext='.log')
-        self.__class__.counter += 1
-        f = filepath.open('w')
-        print >> f, source
-        # some support code...
-        print >> f, py.code.Source("""
-            import sys
-            # we don't want to see the small bridges created
-            # by the checkinterval reaching the limit
-            sys.setcheckinterval(10000000)
-            try: # make the file runnable by CPython
-                import pypyjit
-                pypyjit.set_param(threshold=%d)
-            except ImportError:
-                pass
-
-            def check(args, expected):
-                #print >> sys.stderr, 'trying:', args
-                result = main(*args)
-                #print >> sys.stderr, 'got:', repr(result)
-                assert result == expected
-                assert type(result) is type(expected)
-        """ % threshold)
-        for testcase in testcases * 2:
-            print >> f, "check(%r, %r)" % testcase
-        print >> f, "print 'OK :-)'"
-        f.close()
-
-        print logfilepath
-        env = os.environ.copy()
-        env['PYPYLOG'] = ":%s" % (logfilepath,)
-        p = subprocess.Popen([self.pypy_c, str(filepath)],
-                             env=env, stdout=subprocess.PIPE)
-        result, _ = p.communicate()
-        assert result
-        if result.strip().startswith('SKIP:'):
-            py.test.skip(result.strip())
-        assert result.splitlines()[-1].strip() == 'OK :-)'
-        self.parse_loops(logfilepath)
-        self.print_loops()
-        print logfilepath
-        if self.total_ops > expected_max_ops:
-            assert 0, "too many operations: got %d, expected maximum %d" % (
-                self.total_ops, expected_max_ops)
-        return result
-
-    def parse_loops(self, opslogfile):
-        from pypy.tool import logparser
-        assert opslogfile.check()
-        log = logparser.parse_log_file(str(opslogfile))
-        parts = logparser.extract_category(log, 'jit-log-opt-')
-        self.rawloops = [part for part in parts
-                         if not from_entry_bridge(part, parts)]
-        self.loops, self.sliced_loops, self.total_ops = \
-                                           self.parse_rawloops(self.rawloops)
-        self.check_0_op_bytecodes()
-        self.rawentrybridges = [part for part in parts
-                                if from_entry_bridge(part, parts)]
-        _, self.sliced_entrybridge, _ = \
-                                    self.parse_rawloops(self.rawentrybridges)
-
-        from pypy.jit.tool.jitoutput import parse_prof
-        summaries  = logparser.extract_category(log, 'jit-summary')
-        if len(summaries) > 0:
-            self.jit_summary = parse_prof(summaries[-1])
-        else:
-            self.jit_summary = None
-        
-
-    def parse_rawloops(self, rawloops):
-        from pypy.jit.tool.oparser import parse
-        loops = [parse(part, no_namespace=True) for part in rawloops]
-        sliced_loops = [] # contains all bytecodes of all loops
-        total_ops = 0
-        for loop in loops:
-            for op in loop.operations:
-                if op.getopname() == "debug_merge_point":
-                    sliced_loop = BytecodeTrace()
-                    sliced_loop.bytecode = op.getarg(0)._get_str().rsplit(" ", 1)[1]
-                    sliced_loops.append(sliced_loop)
-                    if self.count_debug_merge_point:
-                        total_ops += 1
-                else:
-                    sliced_loop.append(op)
-                    total_ops += 1
-        return loops, sliced_loops, total_ops
-
-    def check_0_op_bytecodes(self):
-        for bytecodetrace in self.sliced_loops:
-            if bytecodetrace.bytecode not in ZERO_OP_BYTECODES:
-                continue
-            assert not bytecodetrace
-
-    def get_by_bytecode(self, name, from_entry_bridge=False):
-        if from_entry_bridge:
-            sliced_loops = self.sliced_entrybridge
-        else:
-            sliced_loops = self.sliced_loops
-        return [ops for ops in sliced_loops if ops.bytecode == name]
-
-    def print_loops(self):
-        for rawloop in self.rawloops:
-            print
-            print '@' * 79
-            print
-            print rawloop.rstrip()
-        print
-        print '@' * 79
-
-
-    def test_richards(self):
-        self.run_source('''
-            import sys; sys.path[:] = %r
-            from pypy.translator.goal import richards
-
-            def main():
-                return richards.main(iterations = 1)
-        ''' % (sys.path,), 7200,
-                   ([], 42))
-
-
-    def test_overflow_checking(self):
-        startvalue = sys.maxint - 2147483647
-        self.run_source('''
-        def main():
-            def f(a,b):
-                if a < 0: return -1
-                return a-b
-            total = %d
-            for i in range(100000):
-                total += f(i, 5)
-            return total
-        ''' % startvalue, 170, ([], startvalue + 4999450000L))
-
-    def test_shift(self):
-        from sys import maxint
-        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
-        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
-            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
-                r = 0
-                if (a >> b) >= 0:
-                    r += 2000
-                if (a << b) > 2:
-                    r += 20000000
-                if abs(a) < 10 and b < 5:
-                    ops = 13
-                else:
-                    ops = 29
-
-                self.run_source('''
-                def main(a, b):
-                    i = sa = 0
-                    while i < 2000:
-                        if a > 0: # Specialises the loop
-                            pass
-                        if b < 2 and b > 0:
-                            pass
-                        if (a >> b) >= 0:
-                            sa += 1
-                        if (a << b) > 2:
-                            sa += 10000
-                        i += 1
-                    return sa
-                ''', ops, ([a, b], r), count_debug_merge_point=False)
-
-    def test_revert_shift(self):
-        from sys import maxint
-        tests = []
-        for a in (1, 4, 8, 100):
-            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
-                for c in (-10, 10, -maxint/3, maxint/3):
-                    tests.append(([a, b, c], long(4000*(a+b+c))))
-        self.run_source('''
-        def main(a, b, c):
-            from sys import maxint
-            i = sa = 0
-            while i < 2000:
-                if 0 < a < 10: pass
-                if -100 < b < 100: pass
-                if -maxint/2 < c < maxint/2: pass
-                sa += (a<<a)>>a
-                sa += (b<<a)>>a
-                sa += (c<<a)>>a
-                sa += (a<<100)>>100
-                sa += (b<<100)>>100
-                sa += (c<<100)>>100
-                i += 1
-            return long(sa)
-        ''', 93, count_debug_merge_point=False, *tests)
-        
-    def test_division_to_rshift(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a/b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s / %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-%s                
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_mod(self):
-        avalues = ('a', 'b', 7, -42, 8)
-        bvalues = ['b'] + range(-10, 0) + range(1,10)
-        code = ''
-        a1, b1, res1 = 10, 20, 0
-        a2, b2, res2 = 10, -20, 0
-        a3, b3, res3 = -10, -20, 0
-        def dd(a, b, aval, bval):
-            m = {'a': aval, 'b': bval}
-            if not isinstance(a, int):
-                a=m[a]
-            if not isinstance(b, int):
-                b=m[b]
-            return a % b
-        for a in avalues:
-            for b in bvalues:
-                code += '                sa += %s %% %s\n' % (a, b)
-                res1 += dd(a, b, a1, b1)
-                res2 += dd(a, b, a2, b2)
-                res3 += dd(a, b, a3, b3)
-        # The purpose of this test is to check that we get
-        # the correct results, not really to count operations.
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 2000:
-                if a > 0: pass
-                if 1 < b < 2: pass
-%s
-                i += 1
-            return sa
-        ''' % code, sys.maxint, ([a1, b1], 2000 * res1),
-                                ([a2, b2], 2000 * res2),
-                                ([a3, b3], 2000 * res3))
-
-    def test_dont_trace_every_iteration(self):
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 200:
-                if a > 0: pass
-                if 1 < b < 2: pass
-                sa += a % b
-                i += 1
-            return sa
-        ''', 22,  ([10, 20], 200 * (10 % 20)),
-                 ([-10, -20], 200 * (-10 % -20)),
-                        count_debug_merge_point=False)
-        assert self.jit_summary.tracing_no == 2
-    def test_id_compare_optimization(self):
-        # XXX: lower the instruction count, 35 is the old value.
-        self.run_source("""
-        class A(object):
-            pass
-        def main():
-            i = 0
-            a = A()
-            while i < 5:
-                if A() != a:
-                    pass
-                i += 1
-        """, 35, ([], None))
-        _, compare = self.get_by_bytecode("COMPARE_OP")
-        assert "call" not in compare.get_opnames()
-
-class AppTestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if not option.runappdirect:
-            py.test.skip("meant only for pypy-c")
-        # the next line skips stuff if the pypy-c is not a jit build
-        cls.space = gettestobjspace(usemodules=['pypyjit'])
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = sys.executable
-
-class TestJIT(PyPyCJITTests):
-    def setup_class(cls):
-        if option.pypy_c is None:
-            py.test.skip("pass --pypy!")
-        if not has_info(option.pypy_c, 'translation.jit'):
-            py.test.skip("must give a pypy-c with the jit enabled")
-        cls.tmpdir = udir.join('pypy-jit')
-        cls.tmpdir.ensure(dir=1)
-        cls.counter = 0
-        cls.pypy_c = option.pypy_c
-
-
-def test_interface_residual_call():
-    space = gettestobjspace(usemodules=['pypyjit'])
-    space.appexec([], """():
-        import pypyjit
-        def f(*args, **kwds):
-            return (args, kwds)
-        res = pypyjit.residual_call(f, 4, x=6)
-        assert res == ((4,), {'x': 6})
-    """)
-
-
-def has_info(pypy_c, option):
-    g = os.popen('"%s" --info' % pypy_c, 'r')
-    lines = g.readlines()
-    g.close()
-    if not lines:
-        raise ValueError("cannot execute %r" % pypy_c)
-    for line in lines:
-        line = line.strip()
-        if line.startswith(option + ':'):
-            line = line[len(option)+1:].strip()
-            if line == 'True':
-                return True
-            elif line == 'False':
-                return False
-            else:
-                return line
-    raise ValueError(option + ' not found in ' + pypy_c)
diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py
--- a/pypy/module/pypyjit/test_pypy_c/model.py
+++ b/pypy/module/pypyjit/test_pypy_c/model.py
@@ -2,6 +2,7 @@
 import sys
 import re
 import os.path
+from _pytest.assertion import newinterpret
 from pypy.tool.jitlogparser.parser import SimpleParser, Function, TraceForOpcode
 from pypy.tool.jitlogparser.storage import LoopStorage
 
@@ -194,7 +195,7 @@
             # transform self._assert(x, 'foo') into assert x, 'foo'
             source = source.replace('self._assert(', 'assert ')
             source = source[:-1] # remove the trailing ')'
-            self.msg = py.code._reinterpret(source, f, should_fail=True)
+            self.msg = newinterpret.interpret(source, f, should_fail=True)
         else:
             self.msg = "<could not determine information>"
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
rename from pypy/module/pypyjit/test_pypy_c/test_model.py
rename to pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -5,6 +5,7 @@
 from lib_pypy import disassembler
 from pypy.tool.udir import udir
 from pypy.tool import logparser
+from pypy.jit.tool.jitoutput import parse_prof
 from pypy.module.pypyjit.test_pypy_c.model import Log, find_ids_range, find_ids, \
     LoopWithIds, OpMatcher
 
@@ -21,6 +22,7 @@
         self.filepath = self.tmpdir.join(meth.im_func.func_name + '.py')
 
     def run(self, func_or_src, args=[], import_site=False, **jitopts):
+        jitopts.setdefault('threshold', 200)
         src = py.code.Source(func_or_src)
         if isinstance(func_or_src, types.FunctionType):
             funcname = func_or_src.func_name
@@ -56,6 +58,8 @@
         stdout, stderr = pipe.communicate()
         if stderr.startswith('SKIP:'):
             py.test.skip(stderr)
+        if stderr.startswith('debug_alloc.h:'):   # lldebug builds
+            stderr = ''
         assert not stderr
         #
         # parse the JIT log
@@ -63,6 +67,13 @@
         rawtraces = logparser.extract_category(rawlog, 'jit-log-opt-')
         log = Log(rawtraces)
         log.result = eval(stdout)
+        #
+        summaries  = logparser.extract_category(rawlog, 'jit-summary')
+        if len(summaries) > 0:
+            log.jit_summary = parse_prof(summaries[-1])
+        else:
+            log.jit_summary = None
+        #
         return log
 
     def run_and_check(self, src, args=[], **jitopts):
diff --git a/pypy/module/pypyjit/test_pypy_c/test__ffi.py b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test__ffi.py
@@ -0,0 +1,133 @@
+import py
+import sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class Test__ffi(BaseTestPyPyC):
+
+    def test__ffi_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = pow(2, 3)   # ID: fficall
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('fficall', """
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<.* .*Function.inst_name .*>)
+            guard_not_invalidated(descr=...)
+            i17 = force_token()
+            setfield_gc(p0, i17, descr=<.* .*PyFrame.vable_token .*>)
+            f21 = call_release_gil(%d, 2.000000, 3.000000, descr=<FloatCallDescr>)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+        """ % pow_addr)
+
+
+    def test__ffi_call_frame_does_not_escape(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            try:
+                from _ffi import CDLL, types
+            except ImportError:
+                sys.stderr.write('SKIP: cannot import _ffi\n')
+                return 0
+
+            libm = CDLL(libm_name)
+            pow = libm.getfunc('pow', [types.double, types.double],
+                               types.double)
+
+            def mypow(a, b):
+                return pow(a, b)
+
+            i = 0
+            res = 0
+            while i < 300:
+                tmp = mypow(2, 3)
+                res += tmp
+                i += 1
+            return pow.getaddr(), res
+        #
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        pow_addr, res = log.result
+        assert res == 8.0 * 300
+        loop, = log.loops_by_filename(self.filepath)
+        opnames = log.opnames(loop.allops())
+        # we only force the virtualref, not its content
+        assert opnames.count('new_with_vtable') == 1
+
+    def test__ffi_call_releases_gil(self):
+        from pypy.rlib.test.test_libffi import get_libc_name
+        def main(libc_name, n):
+            import time
+            from threading import Thread
+            from _ffi import CDLL, types
+            #
+            libc = CDLL(libc_name)
+            sleep = libc.getfunc('sleep', [types.uint], types.uint)
+            delays = [0]*n + [1]
+            #
+            def loop_of_sleeps(i, delays):
+                for delay in delays:
+                    sleep(delay)    # ID: sleep
+            #
+            threads = [Thread(target=loop_of_sleeps, args=[i, delays]) for i in range(5)]
+            start = time.time()
+            for i, thread in enumerate(threads):
+                thread.start()
+            for thread in threads:
+                thread.join()
+            end = time.time()
+            return end - start
+        #
+        log = self.run(main, [get_libc_name(), 200], threshold=150)
+        assert 1 <= log.result <= 1.5 # at most 0.5 seconds of overhead
+        loops = log.loops_by_id('sleep')
+        assert len(loops) == 1 # make sure that we actually JITted the loop
+
+
+    def test_ctypes_call(self):
+        from pypy.rlib.test.test_libffi import get_libm_name
+        def main(libm_name):
+            import ctypes
+            libm = ctypes.CDLL(libm_name)
+            fabs = libm.fabs
+            fabs.argtypes = [ctypes.c_double]
+            fabs.restype = ctypes.c_double
+            x = -4
+            i = 0
+            while i < 300:
+                x = fabs(x)
+                x = x - 100
+                i += 1
+            return fabs._ptr.getaddr(), x
+
+        libm_name = get_libm_name(sys.platform)
+        log = self.run(main, [libm_name])
+        fabs_addr, res = log.result
+        assert res == -4.0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = loop.allops()
+        opnames = log.opnames(ops)
+        assert opnames.count('new_with_vtable') == 1 # only the virtualref
+        assert opnames.count('call_release_gil') == 1
+        idx = opnames.index('call_release_gil')
+        call = ops[idx]
+        assert int(call.args[0]) == fabs_addr
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -0,0 +1,188 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestArray(BaseTestPyPyC):
+
+    def test_arraycopy_disappears(self):
+        def main(n):
+            i = 0
+            while i < n:
+                t = (1, 2, 3, i + 1)
+                t2 = t[:]
+                del t
+                i = t2[3]
+                del t2
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=...)
+            i9 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_array_sum(self):
+        def main():
+            from array import array
+            img = array("i", range(128) * 5) * 480
+            l, i = 0, 0
+            while i < len(img):
+                l += img[i]
+                i += 1
+            return l
+        #
+        log = self.run(main, [])
+        assert log.result == 19507200
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            guard_not_invalidated(descr=...)
+            i13 = int_lt(i7, i9)
+            guard_true(i13, descr=...)
+            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
+            i16 = int_add_ovf(i8, i15)
+            guard_no_overflow(descr=...)
+            i18 = int_add(i7, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i18, i16, p8, i9, i10, descr=<Loop0>)
+        """)
+
+    def test_array_intimg(self):
+        def main():
+            from array import array
+            img = array('i', range(3)) * (350 * 480)
+            intimg = array('i', (0,)) * (640 * 480)
+            l, i = 0, 640
+            while i < 640 * 480:
+                assert len(img) == 3*350*480
+                assert len(intimg) == 640*480
+                l = l + img[i]
+                intimg[i] = (intimg[i-640] + l)
+                i += 1
+            return intimg[i - 1]
+        #
+        log = self.run(main, [])
+        assert log.result == 73574560
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i13 = int_lt(i8, 307200)
+            guard_true(i13, descr=...)
+            guard_not_invalidated(descr=...)
+        # the bound check guard on img has been killed (thanks to the asserts)
+            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
+            i15 = int_add_ovf(i9, i14)
+            guard_no_overflow(descr=...)
+            i17 = int_sub(i8, 640)
+        # the bound check guard on intimg has been killed (thanks to the asserts)
+            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
+            i19 = int_add_ovf(i18, i15)
+            guard_no_overflow(descr=...)
+        # on 64bit, there is a guard checking that i19 actually fits into 32bit
+            ...
+            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
+            i28 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, p6, i28, i15, p9, i10, i11, descr=<Loop0>)
+        """)
+
+
+    def test_zeropadded(self):
+        def main():
+            from array import array
+            class ZeroPadded(array):
+                def __new__(cls, l):
+                    self = array.__new__(cls, 'd', range(l))
+                    return self
+
+                def __getitem__(self, i):
+                    if i < 0 or i >= len(self):
+                        return 0
+                    return array.__getitem__(self, i) # ID: get
+            #
+            buf = ZeroPadded(2000)
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+
+        log = self.run(main, [])
+        assert log.result == 9895050.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the overloaded __getitem__ does not introduce double
+        # array bound checks.
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i20 = int_ge(i18, i8)
+            guard_false(i20, descr=...)
+            f21 = getarrayitem_raw(i13, i18, descr=...)
+            f23 = getarrayitem_raw(i13, i14, descr=...)
+            f24 = float_add(f21, f23)
+            f26 = getarrayitem_raw(i13, i6, descr=...)
+            f27 = float_add(f24, f26)
+            i29 = int_add(i6, 1)
+            i31 = int_ge(i29, i8)
+            guard_false(i31, descr=...)
+            f33 = getarrayitem_raw(i13, i29, descr=...)
+            f34 = float_add(f27, f33)
+            i36 = int_add(i6, 2)
+            i38 = int_ge(i36, i8)
+            guard_false(i38, descr=...)
+            f39 = getarrayitem_raw(i13, i36, descr=...)
+            ...
+        """)
+
+    def test_circular(self):
+        def main():
+            from array import array
+            class Circular(array):
+                def __new__(cls):
+                    self = array.__new__(cls, 'd', range(256))
+                    return self
+                def __getitem__(self, i):
+                    assert len(self) == 256
+                    return array.__getitem__(self, i & 255)
+            #
+            buf = Circular()
+            i = 10
+            sa = 0
+            while i < 2000 - 10:
+                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+                i += 1
+            return sa
+        #
+        log = self.run(main, [])
+        assert log.result == 1239690.0
+        loop, = log.loops_by_filename(self.filepath)
+        #
+        # check that the array bound checks are removed
+        #
+        # The force_token()s are still there, but will be eliminated by the
+        # backend regalloc, so they are harmless
+        assert loop.match(ignore_ops=['force_token'],
+                          expected_src="""
+            ...
+            i17 = int_and(i14, 255)
+            f18 = getarrayitem_raw(i8, i17, descr=...)
+            f20 = getarrayitem_raw(i8, i9, descr=...)
+            f21 = float_add(f18, f20)
+            f23 = getarrayitem_raw(i8, i10, descr=...)
+            f24 = float_add(f21, f23)
+            i26 = int_add(i6, 1)
+            i29 = int_and(i26, 255)
+            f30 = getarrayitem_raw(i8, i29, descr=...)
+            f31 = float_add(f24, f30)
+            i33 = int_add(i6, 2)
+            i36 = int_and(i33, 255)
+            f37 = getarrayitem_raw(i8, i36, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_boolrewrite.py
@@ -0,0 +1,233 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestBoolRewrite(BaseTestPyPyC):
+
+    def test_boolrewrite_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i >= y)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+
+        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 16001700, False),
+                                        (   'a',    'b', 16001700, False),
+                                        (   'a',    'a', 13001700, True)):
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:         # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        #
+                        if i >= %s:        # ID: ge
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            #
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            loop, = log.loops_by_filename(self.filepath)
+            le_ops = log.opnames(loop.ops_by_id('lt'))
+            ge_ops = log.opnames(loop.ops_by_id('ge'))
+            assert le_ops.count('int_lt') == 1
+            #
+            if opt_expected:
+                assert ge_ops.count('int_ge') == 0
+            else:
+                # if this assert fails it means that the optimization was
+                # applied even if we don't expect to. Check whether the
+                # optimization is valid, and either fix the code or fix the
+                # test :-)
+                assert ge_ops.count('int_ge') == 1
+
+    def test_boolrewrite_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(y > i)
+
+        where x and y can be either constants or variables. There are cases in
+        which the second guard is proven to be always true.
+        """
+        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
+                                        ( '500',  '500', 15001500, True),
+                                        ( '300',  '600', 14001700, False),
+                                        (   'a',    'b', 14001700, False),
+                                        (   'a',    'a', 17001700, True)):
+
+            src = """
+                def main():
+                    sa = 0
+                    a = 300
+                    b = 600
+                    for i in range(1000):
+                        if i < %s:        # ID: lt
+                            sa += 1
+                        else:
+                            sa += 2
+                        if %s > i:        # ID: gt
+                            sa += 10000
+                        else:
+                            sa += 20000
+                    return sa
+            """ % (a, b)
+            log = self.run(src, [], threshold=400)
+            assert log.result == res
+            loop, = log.loops_by_filename(self.filepath)
+            le_ops = log.opnames(loop.ops_by_id('lt'))
+            gt_ops = log.opnames(loop.ops_by_id('gt'))
+            assert le_ops.count('int_lt') == 1
+            #
+            if opt_expected:
+                assert gt_ops.count('int_gt') == 0
+            else:
+                # if this assert fails it means that the optimization was
+                # applied even if we don't expect to. Check whether the
+                # optimization is valid, and either fix the code or fix the
+                # test :-)
+                assert gt_ops.count('int_gt') == 1
+
+
+    def test_boolrewrite_allcases_inverse(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(i > y)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %d:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, op2, b)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if i %s %f:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
+                    yield self.run_and_check, src
+
+
+    def test_boolrewrite_allcases_reflex(self):
+        """
+        Test for this case::
+            guard(i < x)
+            ...
+            guard(x > i)
+
+        with all possible combination of binary comparison operators.  This
+        test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        for op1 in ops:
+            for op2 in ops:
+                for a,b in ((500, 500), (300, 600)):
+                    src = """
+                        def main():
+                            sa = 0
+                            for i in range(300):
+                                if i %s %d:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %d %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                            return sa
+                    """ % (op1, a, b, op2)
+                    yield self.run_and_check, src
+
+                    src = """
+                        def main():
+                            sa = 0
+                            i = 0.0
+                            while i < 250.0:
+                                if i %s %f:
+                                    sa += 1
+                                else:
+                                    sa += 2
+                                if %f %s i:
+                                    sa += 10000
+                                else:
+                                    sa += 20000
+                                i += 0.25
+                            return sa
+                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
+                    yield self.run_and_check, src
+
+    def test_boolrewrite_ptr(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
+        for e1 in compares:
+            for e2 in compares:
+                src = """
+                    class tst(object):
+                        pass
+                    def main():
+                        a = tst()
+                        b = tst()
+                        c = tst()
+                        sa = 0
+                        for i in range(300):
+                            if %s:
+                                sa += 1
+                            else:
+                                sa += 2
+                            if %s:
+                                sa += 10000
+                            else:
+                                sa += 20000
+                            if i > 750:
+                                a = b
+                        return sa
+                """ % (e1, e2)
+                yield self.run_and_check, src
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -0,0 +1,410 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestCall(BaseTestPyPyC):
+
+    def test_recursive_call(self):
+        def fn():
+            def rec(n):
+                if n == 0:
+                    return 0
+                return 1 + rec(n-1)
+            #
+            # this loop is traced and then aborted, because the trace is too
+            # long. But then "rec" is marked as "don't inline". Since we
+            # already traced function from the start (because of number),
+            # now we can inline it as call assembler
+            i = 0
+            j = 0
+            while i < 20:
+                i += 1
+                j += rec(100) # ID: call_rec
+            return j
+        #
+        log = self.run(fn, [], threshold=18)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('call_rec', """
+            ...
+            p53 = call_assembler(..., descr=...)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+            ...
+        """)
+
+    def test_fib(self):
+        def fib(n):
+            if n == 0 or n == 1:
+                return 1
+            return fib(n - 1) + fib(n - 2) # ID: call_rec
+
+        log = self.run(fib, [7], function_threshold=15)
+        loop, = log.loops_by_filename(self.filepath, is_entry_bridge='*')
+        #assert loop.match_by_id('call_rec', '''
+        #...
+        #p1 = call_assembler(..., descr=...)
+        #...
+        #''')
+
+    def test_simple_call(self):
+        src = """
+            OFFSET = 0
+            def f(i):
+                return i + 1 + OFFSET # ID: add
+            def main(n):
+                i = 0
+                while i < n+OFFSET:   # ID: cond
+                    i = f(f(i))       # ID: call
+                    a = 0
+                return i
+        """
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        # first, we test what is inside the entry bridge
+        # -----------------------------------------------
+        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
+        # LOAD_GLOBAL of OFFSET
+        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value",
+                                    "getfield_gc", "guard_value",
+                                    "getfield_gc", "guard_value",
+                                    "getfield_gc", "guard_nonnull_class"]
+        # LOAD_GLOBAL of OFFSET but in different function partially folded
+        # away
+        # XXX could be improved
+        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_value"]
+        #
+        # two LOAD_GLOBAL of f, the second is folded away
+        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
+        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
+        #
+        assert entry_bridge.match_by_id('call', """
+            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
+            guard_nonnull_class(p29, ConstClass(Function), descr=...)
+            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
+            guard_value(p33, ConstPtr(ptr34), descr=...)
+            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
+            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
+            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
+            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
+            i40 = force_token()
+            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
+            guard_isnull(p41, descr=...)
+            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
+            i43 = int_is_zero(i42)
+            guard_true(i43, descr=...)
+            i50 = force_token()
+        """)
+        #
+        # then, we test the actual loop
+        # -----------------------------
+        loop, = log.loops_by_id('call')
+        assert loop.match("""
+            i12 = int_lt(i5, i6)
+            guard_true(i12, descr=...)
+            i13 = force_token()
+            i15 = int_add(i5, 1)
+            i16 = int_add_ovf(i15, i7)
+            guard_no_overflow(descr=...)
+            i18 = force_token()
+            i20 = int_add_ovf(i16, 1)
+            guard_no_overflow(descr=...)
+            i21 = int_add_ovf(i20, i7)
+            guard_no_overflow(descr=...)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
+        """)
+
+    def test_method_call(self):
+        def fn(n):
+            class A(object):
+                def __init__(self, a):
+                    self.a = a
+                def f(self, i):
+                    return self.a + i
+            i = 0
+            a = A(1)
+            while i < n:
+                x = a.f(i)    # ID: meth1
+                i = a.f(x)    # ID: meth2
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
+        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
+                                    'guard_not_invalidated']
+        # the second LOOKUP_METHOD is folded away
+        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i15 = int_lt(i6, i9)
+            guard_true(i15, descr=...)
+            guard_not_invalidated(descr=...)
+            i16 = force_token()
+            i17 = int_add_ovf(i10, i6)
+            guard_no_overflow(descr=...)
+            i18 = force_token()
+            i19 = int_add_ovf(i10, i17)
+            guard_no_overflow(descr=...)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
+        """)
+
+    def test_static_classmethod_call(self):
+        def fn(n):
+            class A(object):
+                @classmethod
+                def f(cls, i):
+                    return i + (cls is A) + 1
+                @staticmethod
+                def g(i):
+                    return i - 1
+            #
+            i = 0
+            a = A()
+            while i < n:
+                x = a.f(i)
+                i = a.g(x)
+            return i
+        #
+        log = self.run(fn, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i14 = int_lt(i6, i9)
+            guard_true(i14, descr=...)
+            guard_not_invalidated(descr=...)
+            i15 = force_token()
+            i17 = int_add_ovf(i8, 1)
+            guard_no_overflow(descr=...)
+            i18 = force_token()
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i8, p7, i17, p8, i9, p10, p11, p12, descr=<Loop0>)
+        """)
+
+    def test_default_and_kw(self):
+        def main(n):
+            def f(i, j=1):
+                return i + j
+            #
+            i = 0
+            while i < n:
+                i = f(f(i), j=1) # ID: call
+                a = 0
+            return i
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', """
+            i14 = force_token()
+            i16 = force_token()
+        """)
+
+    def test_kwargs_empty(self):
+        def main(x):
+            def g(**args):
+                return len(args) + 1
+            #
+            s = 0
+            d = {}
+            i = 0
+            while i < x:
+                s += g(**d)       # ID: call
+                i += 1
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('call')
+        ops = log.opnames(loop.ops_by_id('call'))
+        guards = [ops for ops in ops if ops.startswith('guard')]
+        assert guards == ["guard_no_overflow"]
+
+    def test_kwargs(self):
+        # this is not a very precise test, could be improved
+        def main(x):
+            def g(**args):
+                return len(args)
+            #
+            s = 0
+            d = {"a": 1}
+            i = 0
+            while i < x:
+                s += g(**d)       # ID: call
+                d[str(i)] = i
+                if i % 100 == 99:
+                    d = {"a": 1}
+                i += 1
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 50500
+        loop, = log.loops_by_id('call')
+        print loop.ops_by_id('call')
+        ops = log.opnames(loop.ops_by_id('call'))
+        guards = [ops for ops in ops if ops.startswith('guard')]
+        print guards
+        assert len(guards) <= 20
+
+    def test_stararg_virtual(self):
+        def main(x):
+            def g(*args):
+                return len(args)
+            def h(a, b, c):
+                return c
+            #
+            s = 0
+            for i in range(x):
+                l = [i, x, 2]
+                s += g(*l)       # ID: g1
+                s += h(*l)       # ID: h1
+                s += g(i, x, 2)  # ID: g2
+                a = 0
+            for i in range(x):
+                l = [x, 2]
+                s += g(i, *l)    # ID: g3
+                s += h(i, *l)    # ID: h2
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 13000
+        loop0, = log.loops_by_id('g1')
+        assert loop0.match_by_id('g1', """
+            i20 = force_token()
+            i22 = int_add_ovf(i8, 3)
+            guard_no_overflow(descr=...)
+        """)
+        assert loop0.match_by_id('h1', """
+            i20 = force_token()
+            i22 = int_add_ovf(i8, 2)
+            guard_no_overflow(descr=...)
+        """)
+        assert loop0.match_by_id('g2', """
+            i27 = force_token()
+            i29 = int_add_ovf(i26, 3)
+            guard_no_overflow(descr=...)
+        """)
+        #
+        loop1, = log.loops_by_id('g3')
+        assert loop1.match_by_id('g3', """
+            i21 = force_token()
+            i23 = int_add_ovf(i9, 3)
+            guard_no_overflow(descr=...)
+        """)
+        assert loop1.match_by_id('h2', """
+            i25 = force_token()
+            i27 = int_add_ovf(i23, 2)
+            guard_no_overflow(descr=...)
+        """)
+
+    def test_stararg(self):
+        def main(x):
+            def g(*args):
+                return args[-1]
+            def h(*args):
+                return len(args)
+            #
+            s = 0
+            l = []
+            i = 0
+            while i < x:
+                l.append(1)
+                s += g(*l)     # ID: g
+                i = h(*l)      # ID: h
+                a = 0
+            return s
+        #
+        log = self.run(main, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_id('g')
+        ops_g = log.opnames(loop.ops_by_id('g'))
+        ops_h = log.opnames(loop.ops_by_id('h'))
+        ops = ops_g + ops_h
+        assert 'new_with_vtable' not in ops
+        assert 'call_may_force' not in ops
+
+    def test_call_builtin_function(self):
+        def main(n):
+            i = 2
+            l = []
+            while i < n:
+                i += 1
+                l.append(i)    # ID: append
+                a = 0
+            return i, len(l)
+        #
+        log = self.run(main, [1000])
+        assert log.result == (1000, 998)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('append', """
+            i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
+            i15 = int_add(i13, 1)
+            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
+            guard_no_exception(descr=...)
+            p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
+            p19 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p19, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
+            setarrayitem_gc(p17, i13, p19, descr=<GcPtrArrayDescr>)
+        """)
+
+    def test_blockstack_virtualizable(self):
+        def main(n):
+            from pypyjit import residual_call
+            i = 0
+            while i < n:
+                try:
+                    residual_call(len, [])   # ID: call
+                except:
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('call')
+        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
+            # make sure that the "block" is not allocated
+            ...
+            i20 = force_token()
+            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
+            p22 = new_with_vtable(19511408)
+            p24 = new_array(1, descr=<GcPtrArrayDescr>)
+            p26 = new_with_vtable(ConstClass(W_ListObject))
+            p27 = new(descr=<SizeDescr .*>)
+            p29 = new_array(0, descr=<GcPtrArrayDescr>)
+            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
+            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
+            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
+            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
+            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
+            ...
+        """)
+
+    def test_func_defaults(self):
+        def main(n):
+            i = 1
+            while i < n:
+                i += len(xrange(i+1)) - i
+            return i
+
+        log = self.run(main, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i5, i6)
+            guard_true(i10, descr=...)
+            guard_not_invalidated(descr=...)
+            i120 = int_add(i5, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py
@@ -0,0 +1,25 @@
+
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+
+class TestDicts(BaseTestPyPyC):
+    def test_strdict(self):
+        def fn(n):
+            import sys
+            d = {}
+            class A(object):
+                pass
+            a = A()
+            a.x = 1
+            for s in sys.modules.keys() * 1000:
+                inc = a.x # ID: look
+                d[s] = d.get(s, 0) + inc
+            return sum(d.values())
+        #
+        log = self.run(fn, [1000])
+        assert log.result % 1000 == 0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = loop.ops_by_id('look')
+        assert log.opnames(ops) == ['setfield_gc',
+                                    'guard_not_invalidated']
diff --git a/pypy/module/pypyjit/test_pypy_c/test_exception.py b/pypy/module/pypyjit/test_pypy_c/test_exception.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_exception.py
@@ -0,0 +1,93 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestException(BaseTestPyPyC):
+
+    def test_cmp_exc(self):
+        def f1(n):
+            # So we don't get a LOAD_GLOBAL op
+            KE = KeyError
+            i = 0
+            while i < n:
+                try:
+                    raise KE
+                except KE: # ID: except
+                    i += 1
+            return i
+
+        log = self.run(f1, [10000])
+        assert log.result == 10000
+        loop, = log.loops_by_id("except")
+        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
+        assert ops == []
+
+    def test_exception_inside_loop_1(self):
+        def main(n):
+            while n:
+                try:
+                    raise ValueError
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+        i5 = int_is_true(i3)
+        guard_true(i5, descr=...)
+        guard_not_invalidated(descr=...)
+        --EXC-TICK--
+        i12 = int_sub_ovf(i3, 1)
+        guard_no_overflow(descr=...)
+        --TICK--
+        jump(..., descr=<Loop0>)
+        """)
+
+    def test_exception_inside_loop_2(self):
+        def main(n):
+            def g(n):
+                raise ValueError(n)  # ID: raise
+            def f(n):
+                g(n)
+            #
+            while n:
+                try:
+                    f(n)
+                except ValueError:
+                    pass
+                n -= 1
+            return n
+        #
+        log = self.run(main, [1000])
+        assert log.result == 0
+        loop, = log.loops_by_filename(self.filepath)
+        ops = log.opnames(loop.ops_by_id('raise'))
+        assert 'new' not in ops
+
+    def test_reraise(self):
+        def f(n):
+            i = 0
+            while i < n:
+                try:
+                    try:
+                        raise KeyError
+                    except KeyError:
+                        raise
+                except KeyError:
+                    i += 1
+            return i
+
+        log = self.run(f, [100000])
+        assert log.result == 100000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, i5)
+            guard_true(i7, descr=...)
+            guard_not_invalidated(descr=...)
+            --EXC-TICK--
+            i14 = int_add(i4, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_globals.py b/pypy/module/pypyjit/test_pypy_c/test_globals.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_globals.py
@@ -0,0 +1,30 @@
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+
+class TestGlobals(BaseTestPyPyC):
+    def test_load_builtin(self):
+        def main(n):
+            import pypyjit
+
+            i = 0
+            while i < n:
+                l = len # ID: loadglobal
+                i += pypyjit.residual_call(l, "a")
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("loadglobal", """
+            p10 = getfield_gc(p0, descr=<GcPtrFieldDescr .*Frame.inst_w_globals .*>)
+            guard_value(p10, ConstPtr(ptr11), descr=...)
+            p12 = getfield_gc(p10, descr=<GcPtrFieldDescr .*W_DictMultiObject.inst_strategy .*>)
+            guard_value(p12, ConstPtr(ptr13), descr=...)
+            p15 = getfield_gc(ConstPtr(ptr14), descr=<GcPtrFieldDescr .*ModuleCell.inst_w_value .*>)
+            guard_isnull(p15, descr=...)
+            guard_not_invalidated(descr=...)
+            p19 = getfield_gc(ConstPtr(p17), descr=<GcPtrFieldDescr .*W_DictMultiObject.inst_strategy .*>)
+            guard_value(p19, ConstPtr(ptr20), descr=...)
+            p22 = getfield_gc(ConstPtr(ptr21), descr=<GcPtrFieldDescr .*ModuleCell.inst_w_value .*>)
+            guard_nonnull(p22, descr=...)
+        """)
\ No newline at end of file
diff --git a/pypy/module/pypyjit/test_pypy_c/test_import.py b/pypy/module/pypyjit/test_pypy_c/test_import.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_import.py
@@ -0,0 +1,46 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestImport(BaseTestPyPyC):
+
+    def test_import_in_function(self):
+        def main(n):
+            i = 0
+            while i < n:
+                from sys import version  # ID: import
+                i += 1
+            return i
+        #
+        log = self.run(main, [500])
+        assert log.result == 500
+        loop, = log.loops_by_id('import')
+        assert loop.match_by_id('import', """
+            guard_not_invalidated(descr=...)
+            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p11, ConstPtr(ptr12), descr=...)
+            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
+            guard_value(p14, ConstPtr(ptr17), descr=...)
+            guard_isnull(p16, descr=...)
+        """)
+
+    def test_import_fast_path(self, tmpdir):
+        pkg = tmpdir.join('mypkg').ensure(dir=True)
+        pkg.join('__init__.py').write("")
+        pkg.join('mod.py').write(str(py.code.Source("""
+            def do_the_import():
+                import sys
+        """)))
+        def main(path, n):
+            import sys
+            sys.path.append(path)
+            from mypkg.mod import do_the_import
+            for i in range(n):
+                do_the_import()
+        #
+        log = self.run(main, [str(tmpdir), 300])
+        loop, = log.loops_by_filename(self.filepath)
+        # this is a check for a slow-down that introduced a
+        # call_may_force(absolute_import_with_lock).
+        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
+            assert 'call' not in opname    # no call-like opcode
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -0,0 +1,201 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestInstance(BaseTestPyPyC):
+
+    def test_virtual_instance(self):
+        def main(n):
+            class A(object):
+                pass
+            #
+            i = 0
+            while i < n:
+                a = A()
+                assert isinstance(a, A)
+                assert not isinstance(a, int)
+                a.x = 2
+                i = i + a.x
+            return i
+        #
+        log = self.run(main, [1000], threshold = 400)
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, i6)
+            guard_true(i7, descr=...)
+            guard_not_invalidated(descr=...)
+            i9 = int_add_ovf(i5, 2)
+            guard_no_overflow(descr=...)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
+        """)
+
+    def test_load_attr(self):
+        src = '''
+            class A(object):
+                pass
+            a = A()
+            a.x = 2
+            def main(n):
+                i = 0
+                while i < n:
+                    i = i + a.x
+                return i
+        '''
+        log = self.run(src, [1000])
+        assert log.result == 1000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i9 = int_lt(i5, i6)
+            guard_true(i9, descr=...)
+            guard_not_invalidated(descr=...)
+            i10 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=...)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
+        """)
+
+    def test_getattr_with_dynamic_attribute(self):
+        src = """
+        class A(object):
+            pass
+
+        l = ["x", "y"]
+
+        def main():
+            sum = 0
+            a = A()
+            a.a1 = 0
+            a.a2 = 0
+            a.a3 = 0
+            a.a4 = 0
+            a.a5 = 0 # workaround, because the first five attributes need a promotion
+            a.x = 1
+            a.y = 2
+            i = 0
+            while i < 500:
+                name = l[i % 2]
+                sum += getattr(a, name)
+                i += 1
+            return sum
+        """
+        log = self.run(src, [])
+        assert log.result == 250 + 250*2
+        loops = log.loops_by_filename(self.filepath)
+        assert len(loops) == 1
+
+    def test_mutate_class(self):
+        def fn(n):
+            class A(object):
+                count = 1
+                def __init__(self, a):
+                    self.a = a
+                def f(self):
+                    return self.count
+            i = 0
+            a = A(1)
+            while i < n:
+                A.count += 1 # ID: mutate
+                i = a.f()    # ID: meth1
+            return i
+        #
+        log = self.run(fn, [1000], threshold=10)
+        assert log.result == 1000
+        #
+        # first, we test the entry bridge
+        # -------------------------------
+        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
+        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
+        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
+                                    'getfield_gc', 'guard_nonnull_class']
+        # the STORE_ATTR is folded away
+        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
+        #
+        # then, the actual loop
+        # ----------------------
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i9 = int_lt(i8, i7)
+            guard_true(i9, descr=.*)
+            guard_not_invalidated(descr=.*)
+            i11 = int_add(i8, 1)
+            i12 = force_token()
+            --TICK--
+            p20 = new_with_vtable(ConstClass(W_IntObject))
+            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
+            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
+            jump(p0, p1, p2, p3, p4, p20, p6, i11, i7, descr=<Loop.>)
+        """)
+
+    def test_oldstyle_newstyle_mix(self):
+        def main():
+            class A:
+                pass
+
+            class B(object, A):
+                def __init__(self, x):
+                    self.x = x
+
+            i = 0
+            b = B(1)
+            while i < 100:
+                v = b.x # ID: loadattr
+                i += v
+            return i
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('loadattr',
+        '''
+        guard_not_invalidated(descr=...)
+        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i21 = int_and(i19, _)
+        i22 = int_is_true(i21)
+        guard_true(i22, descr=...)
+        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
+        guard_no_exception(descr=...)
+        i28 = int_and(i26, _)
+        i29 = int_is_true(i28)
+        guard_true(i29, descr=...)
+        ''')
+
+    def test_python_contains(self):
+        def main():
+            class A(object):
+                def __contains__(self, v):
+                    return True
+
+            i = 0
+            a = A()
+            while i < 100:
+                i += i in a # ID: contains
+                b = 0       # to make sure that JUMP_ABSOLUTE is not part of the ID
+
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("contains", """
+            guard_not_invalidated(descr=...)
+            i11 = force_token()
+            i12 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=...)
+        """)
+
+    def test_id_compare_optimization(self):
+        def main():
+            class A(object):
+                pass
+            #
+            i = 0
+            a = A()
+            while i < 300:
+                new_a = A()
+                if new_a != a:  # ID: compare
+                    pass
+                i += 1
+            return i
+        #
+        log = self.run(main, [])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("compare", "") # optimized away
+
diff --git a/pypy/module/pypyjit/test_pypy_c/test_intbound.py b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
@@ -0,0 +1,296 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestIntbound(BaseTestPyPyC):
+
+    def test_intbound_simple(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        ops = ('<', '>', '<=', '>=', '==', '!=')
+        nbr = (3, 7)
+        for o1 in ops:
+            for o2 in ops:
+                for n1 in nbr:
+                    for n2 in nbr:
+                        src = '''
+                        def f(i):
+                            a, b = 3, 3
+                            if i %s %d:
+                                a = 0
+                            else:
+                                a = 1
+                            if i %s %d:
+                                b = 0
+                            else:
+                                b = 1
+                            return a + b * 2
+
+                        def main():
+                            res = [0] * 4
+                            idx = []
+                            for i in range(15):
+                                idx.extend([i] * 15)
+                            for i in idx:
+                                res[f(i)] += 1
+                            return res
+
+                        ''' % (o1, n1, o2, n2)
+                        yield self.run_and_check, src
+
+    def test_intbound_addsub_mix(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
+                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
+                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
+        for t1 in tests:
+            for t2 in tests:
+                src = '''
+                def f(i):
+                    a, b = 3, 3
+                    if %s:
+                        a = 0
+                    else:
+                        a = 1
+                    if %s:
+                        b = 0
+                    else:
+                        b = 1
+                    return a + b * 2
+
+                def main():
+                    res = [0] * 4
+                    idx = []
+                    for i in range(15):
+                        idx.extend([i] * 15)
+                    for i in idx:
+                        res[f(i)] += 1
+                    return res
+
+                ''' % (t1, t2)
+                yield self.run_and_check, src
+
+    def test_intbound_gt(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i > -1:
+                    a += 1
+                if i > -2:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i17 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i14, i12, i17, p8, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_sub_lt(self):
+        def main():
+            i, a = 0, 0
+            while i < 300:
+                if i - 10 < 295:
+                    a += 1
+                i += 1
+            return a
+        #
+        log = self.run(main, [])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i5, 300)
+            guard_true(i7, descr=...)
+            i9 = int_sub_ovf(i5, 10)
+            guard_no_overflow(descr=...)
+            i11 = int_add_ovf(i4, 1)
+            guard_no_overflow(descr=...)
+            i13 = int_add(i5, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
+        """)
+
+    def test_intbound_addsub_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < n:
+                if i + 5 >= 5:
+                    a += 1
+                if i - 1 >= -1:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, i9)
+            guard_true(i10, descr=...)
+            i12 = int_add_ovf(i8, 5)
+            guard_no_overflow(descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i19 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i16, i14, i19, p8, i9, descr=<Loop0>)
+        """)
+
+    def test_intbound_addmul_ge(self):
+        def main(n):
+            i, a, b = 0, 0, 0
+            while i < 300:
+                if i + 5 >= 5:
+                    a += 1
+                if 2 * i >= 0:
+                    b += 1
+                i += 1
+            return (a, b)
+        #
+        log = self.run(main, [300])
+        assert log.result == (300, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_add(i8, 5)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_lshift(i8, 1)
+            i18 = int_add_ovf(i6, 1)
+            guard_no_overflow(descr=...)
+            i21 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i18, i14, i21, p8, descr=<Loop0>)
+        """)
+
+    def test_intbound_eq(self):
+        def main(a, n):
+            i, s = 0, 0
+            while i < 300:
+                if a == 7:
+                    s += a + 1
+                elif i == 10:
+                    s += i
+                else:
+                    s += 1
+                i += 1
+            return s
+        #
+        log = self.run(main, [7, 300])
+        assert log.result == main(7, 300)
+        log = self.run(main, [10, 300])
+        assert log.result == main(10, 300)
+        log = self.run(main, [42, 300])
+        assert log.result == main(42, 300)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i8, 300)
+            guard_true(i10, descr=...)
+            i12 = int_eq(i8, 10)
+            guard_false(i12, descr=...)
+            i14 = int_add_ovf(i7, 1)
+            guard_no_overflow(descr=...)
+            i16 = int_add(i8, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p6, i14, i16, p8, descr=<Loop0>)
+        """)
+
+    def test_intbound_mul(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert i >= 0
+                if 2 * i < 30000:
+                    s += 1
+                else:
+                    s += a
+                i += 1
+            return s
+        #
+        log = self.run(main, [7])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_lshift(i6, 1)
+            i12 = int_add_ovf(i5, 1)
+            guard_no_overflow(descr=...)
+            i14 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
+        """)
+
+    def test_assert(self):
+        def main(a):
+            i, s = 0, 0
+            while i < 300:
+                assert a == 7
+                s += a + 1
+                i += 1
+            return s
+        log = self.run(main, [7])
+        assert log.result == 300*8
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i8 = int_lt(i6, 300)
+            guard_true(i8, descr=...)
+            i10 = int_add_ovf(i5, 8)
+            guard_no_overflow(descr=...)
+            i12 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
+        """)
+
+    def test_xor(self):
+        def main(b):
+            a = sa = 0
+            while a < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b > 10:
+                    pass
+                if a^b >= 0:  # ID: guard
+                    sa += 1
+                sa += a^a     # ID: a_xor_a
+                a += 1
+            return sa
+
+        log = self.run(main, [11])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # if both are >=0, a^b is known to be >=0
+        # note that we know that b>10
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+        """)
+        #
+        # x^x is always optimized to 0
+        assert loop.match_by_id('a_xor_a', "")
+
+        log = self.run(main, [9])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        # we don't know that b>10, hence we cannot optimize it
+        assert loop.match_by_id('guard', """
+            i10 = int_xor(i5, i7)
+            i12 = int_ge(i10, 0)
+            guard_true(i12, descr=...)
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py
@@ -0,0 +1,68 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestMinMax(BaseTestPyPyC):
+
+    def test_min_max(self):
+        def main():
+            i=0
+            sa=0
+            while i < 300:
+                sa+=min(max(i, 3000), 4000)
+                i+=1
+            return sa
+        log = self.run(main, [])
+        assert log.result == 300*3000
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i7 = int_lt(i4, 300)
+            guard_true(i7, descr=...)
+            guard_not_invalidated(descr=...)
+            i9 = int_add_ovf(i5, 3000)
+            guard_no_overflow(descr=...)
+            i11 = int_add(i4, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
+        """)
+
+    def test_silly_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(*lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
+
+    def test_iter_max(self):
+        def main():
+            i = 2
+            sa = 0
+            while i < 300:
+                lst = range(i)
+                sa += max(lst) # ID: max
+                i += 1
+            return sa
+        log = self.run(main, [])
+        assert log.result == main()
+        loop, = log.loops_by_filename(self.filepath)
+        # We dont want too many guards, but a residual call to min_max_loop
+        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+        assert len(guards) < 20
+        assert loop.match_by_id('max',"""
+            ...
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+            ...
+        """)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
rename from pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
rename to pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -1,13 +1,8 @@
-import py, sys, re
-import subprocess
-from lib_pypy import disassembler
-from pypy.tool.udir import udir
-from pypy.tool import logparser
-from pypy.module.pypyjit.test_pypy_c.model import Log
-from pypy.module.pypyjit.test_pypy_c.test_model import BaseTestPyPyC
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
 
-class TestPyPyCNew(BaseTestPyPyC):
+class TestMisc(BaseTestPyPyC):
     def test_f1(self):
         def f1(n):
             "Arbitrary test function."
@@ -76,377 +71,6 @@
         """)
 
 
-    def test_recursive_call(self):
-        def fn():
-            def rec(n):
-                if n == 0:
-                    return 0
-                return 1 + rec(n-1)
-            #
-            # this loop is traced and then aborted, because the trace is too
-            # long. But then "rec" is marked as "don't inline"
-            i = 0
-            j = 0
-            while i < 20:
-                i += 1
-                j += rec(100)
-            #
-            # next time we try to trace "rec", instead of inlining we compile
-            # it separately and generate a call_assembler
-            i = 0
-            j = 0
-            while i < 20:
-                i += 1
-                j += rec(100) # ID: call_rec
-                a = 0
-            return j
-        #
-        log = self.run(fn, [], threshold=18)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('call_rec', """
-            ...
-            p53 = call_assembler(..., descr=...)
-            guard_not_forced(descr=...)
-            guard_no_exception(descr=...)
-            ...
-        """)
-
-    def test_cmp_exc(self):
-        def f1(n):
-            # So we don't get a LOAD_GLOBAL op
-            KE = KeyError
-            i = 0
-            while i < n:
-                try:
-                    raise KE
-                except KE: # ID: except
-                    i += 1
-            return i
-
-        log = self.run(f1, [10000])
-        assert log.result == 10000
-        loop, = log.loops_by_id("except")
-        ops = list(loop.ops_by_id("except", opcode="COMPARE_OP"))
-        assert ops == []
-
-    def test_simple_call(self):
-        src = """
-            OFFSET = 0
-            def f(i):
-                return i + 1 + OFFSET # ID: add
-            def main(n):
-                i = 0
-                while i < n+OFFSET:   # ID: cond
-                    i = f(f(i))       # ID: call
-                    a = 0
-                return i
-        """
-        log = self.run(src, [1000], threshold=400)
-        assert log.result == 1000
-        # first, we test what is inside the entry bridge
-        # -----------------------------------------------
-        entry_bridge, = log.loops_by_id('call', is_entry_bridge=True)
-        # LOAD_GLOBAL of OFFSET
-        ops = entry_bridge.ops_by_id('cond', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["guard_value",
-                                    "getfield_gc", "guard_value",
-                                    "getfield_gc", "guard_isnull",
-                                    "getfield_gc", "guard_nonnull_class"]
-        # LOAD_GLOBAL of OFFSET but in different function partially folded
-        # away
-        # XXX could be improved
-        ops = entry_bridge.ops_by_id('add', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["guard_value", "getfield_gc", "guard_isnull"]
-        #
-        # two LOAD_GLOBAL of f, the second is folded away
-        ops = entry_bridge.ops_by_id('call', opcode='LOAD_GLOBAL')
-        assert log.opnames(ops) == ["getfield_gc", "guard_nonnull_class"]
-        #
-        assert entry_bridge.match_by_id('call', """
-            p29 = getfield_gc(ConstPtr(ptr28), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value .*>)
-            guard_nonnull_class(p29, ConstClass(Function), descr=<Guard18>)
-            p33 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_code .*>)
-            guard_value(p33, ConstPtr(ptr34), descr=<Guard19>)
-            p35 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_w_func_globals .*>)
-            p36 = getfield_gc(p29, descr=<GcPtrFieldDescr pypy.interpreter.function.Function.inst_closure .*>)
-            p38 = call(ConstClass(getexecutioncontext), descr=<GcPtrCallDescr>)
-            p39 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
-            i40 = force_token()
-            p41 = getfield_gc(p38, descr=<GcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
-            guard_isnull(p41, descr=<Guard20>)
-            i42 = getfield_gc(p38, descr=<NonGcPtrFieldDescr pypy.interpreter.executioncontext.ExecutionContext.inst_profilefunc .*>)
-            i43 = int_is_zero(i42)
-            guard_true(i43, descr=<Guard21>)
-            i50 = force_token()
-        """)
-        #
-        # then, we test the actual loop
-        # -----------------------------
-        loop, = log.loops_by_id('call')
-        assert loop.match("""
-            i12 = int_lt(i5, i6)
-            guard_true(i12, descr=<Guard3>)
-            i13 = force_token()
-            i15 = int_add(i5, 1)
-            i16 = int_add_ovf(i15, i7)
-            guard_no_overflow(descr=<Guard4>)
-            i18 = force_token()
-            i20 = int_add_ovf(i16, 1)
-            guard_no_overflow(descr=<Guard5>)
-            i21 = int_add_ovf(i20, i7)
-            guard_no_overflow(descr=<Guard6>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, p10, p11, descr=<Loop0>)
-        """)
-
-    def test_method_call(self):
-        def fn(n):
-            class A(object):
-                def __init__(self, a):
-                    self.a = a
-                def f(self, i):
-                    return self.a + i
-            i = 0
-            a = A(1)
-            while i < n:
-                x = a.f(i)    # ID: meth1
-                i = a.f(x)    # ID: meth2
-            return i
-        #
-        log = self.run(fn, [1000], threshold=400)
-        assert log.result == 1000
-        #
-        # first, we test the entry bridge
-        # -------------------------------
-        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
-        ops = entry_bridge.ops_by_id('meth1', opcode='LOOKUP_METHOD')
-        assert log.opnames(ops) == ['guard_value', 'getfield_gc', 'guard_value',
-                                    'guard_not_invalidated']
-        # the second LOOKUP_METHOD is folded away
-        assert list(entry_bridge.ops_by_id('meth2', opcode='LOOKUP_METHOD')) == []
-        #
-        # then, the actual loop
-        # ----------------------
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i15 = int_lt(i6, i9)
-            guard_true(i15, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i16 = force_token()
-            i17 = int_add_ovf(i10, i6)
-            guard_no_overflow(descr=<Guard5>)
-            i18 = force_token()
-            i19 = int_add_ovf(i10, i17)
-            guard_no_overflow(descr=<Guard6>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i19, p7, i17, i9, i10, p11, p12, p13, descr=<Loop0>)
-        """)
-
-    def test_static_classmethod_call(self):
-        def fn(n):
-            class A(object):
-                @classmethod
-                def f(cls, i):
-                    return i + (cls is A) + 1
-                @staticmethod
-                def g(i):
-                    return i - 1
-            #
-            i = 0
-            a = A()
-            while i < n:
-                x = a.f(i)
-                i = a.g(x)
-            return i
-        #
-        log = self.run(fn, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i14 = int_lt(i6, i9)
-            guard_true(i14, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i15 = force_token()
-            i17 = int_add_ovf(i8, 1)
-            guard_no_overflow(descr=<Guard5>)
-            i18 = force_token()
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i8, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
-        """)
-
-    def test_default_and_kw(self):
-        def main(n):
-            def f(i, j=1):
-                return i + j
-            #
-            i = 0
-            while i < n:
-                i = f(f(i), j=1) # ID: call
-                a = 0
-            return i
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_id('call')
-        assert loop.match_by_id('call', """
-            i14 = force_token()
-            i16 = force_token()
-        """)
-
-    def test_kwargs(self):
-        # this is not a very precise test, could be improved
-        def main(x):
-            def g(**args):
-                return len(args)
-            #
-            s = 0
-            d = {}
-            for i in range(x):
-                s += g(**d)       # ID: call
-                d[str(i)] = i
-                if i % 100 == 99:
-                    d = {}
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 49500
-        loop, = log.loops_by_id('call')
-        ops = log.opnames(loop.ops_by_id('call'))
-        guards = [ops for ops in ops if ops.startswith('guard')]
-        assert len(guards) <= 5
-
-    def test_stararg_virtual(self):
-        def main(x):
-            def g(*args):
-                return len(args)
-            def h(a, b, c):
-                return c
-            #
-            s = 0
-            for i in range(x):
-                l = [i, x, 2]
-                s += g(*l)       # ID: g1
-                s += h(*l)       # ID: h1
-                s += g(i, x, 2)  # ID: g2
-                a = 0
-            for i in range(x):
-                l = [x, 2]
-                s += g(i, *l)    # ID: g3
-                s += h(i, *l)    # ID: h2
-                a = 0
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 13000
-        loop0, = log.loops_by_id('g1')
-        assert loop0.match_by_id('g1', """
-            i20 = force_token()
-            setfield_gc(p4, i19, descr=<.*W_AbstractSeqIterObject.inst_index .*>)
-            i22 = int_add_ovf(i8, 3)
-            guard_no_overflow(descr=<Guard4>)
-        """)
-        assert loop0.match_by_id('h1', """
-            i20 = force_token()
-            i22 = int_add_ovf(i8, 2)
-            guard_no_overflow(descr=<Guard5>)
-        """)
-        assert loop0.match_by_id('g2', """
-            i27 = force_token()
-            i29 = int_add_ovf(i26, 3)
-            guard_no_overflow(descr=<Guard6>)
-        """)
-        #
-        loop1, = log.loops_by_id('g3')
-        assert loop1.match_by_id('g3', """
-            i21 = force_token()
-            setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
-            i23 = int_add_ovf(i9, 3)
-            guard_no_overflow(descr=<Guard37>)
-        """)
-        assert loop1.match_by_id('h2', """
-            i25 = force_token()
-            i27 = int_add_ovf(i23, 2)
-            guard_no_overflow(descr=<Guard38>)
-        """)
-
-    def test_stararg(self):
-        def main(x):
-            def g(*args):
-                return args[-1]
-            def h(*args):
-                return len(args)
-            #
-            s = 0
-            l = []
-            i = 0
-            while i < x:
-                l.append(1)
-                s += g(*l)     # ID: g
-                i = h(*l)      # ID: h
-                a = 0
-            return s
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_id('g')
-        ops_g = log.opnames(loop.ops_by_id('g'))
-        ops_h = log.opnames(loop.ops_by_id('h'))
-        ops = ops_g + ops_h
-        assert 'new_with_vtable' not in ops
-        assert 'call_may_force' not in ops
-
-    def test_virtual_instance(self):
-        def main(n):
-            class A(object):
-                pass
-            #
-            i = 0
-            while i < n:
-                a = A()
-                assert isinstance(a, A)
-                assert not isinstance(a, int)
-                a.x = 2
-                i = i + a.x
-            return i
-        #
-        log = self.run(main, [1000], threshold = 400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, i6)
-            guard_true(i7, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i9 = int_add_ovf(i5, 2)
-            guard_no_overflow(descr=<Guard5>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
-        """)
-
-    def test_load_attr(self):
-        src = '''
-            class A(object):
-                pass
-            a = A()
-            a.x = 2
-            def main(n):
-                i = 0
-                while i < n:
-                    i = i + a.x
-                return i
-        '''
-        log = self.run(src, [1000], threshold=400)
-        assert log.result == 1000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i9 = int_lt(i5, i6)
-            guard_true(i9, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            i10 = int_add_ovf(i5, i7)
-            guard_no_overflow(descr=<Guard5>)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i10, i6, p7, i7, p8, descr=<Loop0>)
-        """)
-
     def test_mixed_type_loop(self):
         def main(n):
             i = 0.0
@@ -455,40 +79,17 @@
                 i = j + i
             return i
         #
-        log = self.run(main, [1000], threshold=400)
+        log = self.run(main, [1000])
         assert log.result == 1000.0
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
             i9 = float_lt(f5, f7)
-            guard_true(i9, descr=<Guard3>)
+            guard_true(i9, descr=...)
             f10 = float_add(f8, f5)
             --TICK--
             jump(p0, p1, p2, p3, p4, f10, p6, f7, f8, descr=<Loop0>)
         """)
 
-    def test_call_builtin_function(self):
-        def main(n):
-            i = 2
-            l = []
-            while i < n:
-                i += 1
-                l.append(i)    # ID: append
-                a = 0
-            return i, len(l)
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == (1000, 998)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('append', """
-            i13 = getfield_gc(p8, descr=<SignedFieldDescr list.length .*>)
-            i15 = int_add(i13, 1)
-            call(ConstClass(_ll_list_resize_ge__listPtr_Signed), p8, i15, descr=<VoidCallDescr>)
-            guard_no_exception(descr=<Guard4>)
-            p17 = getfield_gc(p8, descr=<GcPtrFieldDescr list.items .*>)
-            p19 = new_with_vtable(ConstClass(W_IntObject))
-            setfield_gc(p19, i12, descr=<SignedFieldDescr .*W_IntObject.inst_intval .*>)
-            setarrayitem_gc(p17, i13, p19, descr=<GcPtrArrayDescr>)
-        """)
 
     def test_range_iter(self):
         def main(n):
@@ -501,98 +102,28 @@
                 a = 0
             return s
         #
-        log = self.run(main, [1000], threshold=400)
+        log = self.run(main, [1000])
         assert log.result == 1000 * 999 / 2
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
             i16 = int_ge(i11, i12)
-            guard_false(i16, descr=<Guard3>)
+            guard_false(i16, descr=...)
             i17 = int_mul(i11, i14)
             i18 = int_add(i15, i17)
             i20 = int_add(i11, 1)
             i21 = force_token()
             setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>)
-            guard_not_invalidated(descr=<Guard4>)
+            guard_not_invalidated(descr=...)
             i23 = int_lt(i18, 0)
-            guard_false(i23, descr=<Guard5>)
+            guard_false(i23, descr=...)
             i25 = int_ge(i18, i9)
-            guard_false(i25, descr=<Guard6>)
+            guard_false(i25, descr=...)
             i27 = int_add_ovf(i7, i18)
-            guard_no_overflow(descr=<Guard7>)
+            guard_no_overflow(descr=...)
             --TICK--
             jump(..., descr=<Loop0>)
         """)
 
-    def test_exception_inside_loop_1(self):
-        def main(n):
-            while n:
-                try:
-                    raise ValueError
-                except ValueError:
-                    pass
-                n -= 1
-            return n
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 0
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-        i5 = int_is_true(i3)
-        guard_true(i5, descr=<Guard3>)
-        guard_not_invalidated(descr=<Guard4>)
-        --EXC-TICK--
-        i12 = int_sub_ovf(i3, 1)
-        guard_no_overflow(descr=<Guard6>)
-        --TICK--
-        jump(..., descr=<Loop0>)
-        """)
-
-    def test_exception_inside_loop_2(self):
-        def main(n):
-            def g(n):
-                raise ValueError(n)  # ID: raise
-            def f(n):
-                g(n)
-            #
-            while n:
-                try:
-                    f(n)
-                except ValueError:
-                    pass
-                n -= 1
-            return n
-        #
-        log = self.run(main, [1000], threshold=400)
-        assert log.result == 0
-        loop, = log.loops_by_filename(self.filepath)
-        ops = log.opnames(loop.ops_by_id('raise'))
-        assert 'new' not in ops
-
-    def test_reraise(self):
-        def f(n):
-            i = 0
-            while i < n:
-                try:
-                    try:
-                        raise KeyError
-                    except KeyError:
-                        raise
-                except KeyError:
-                    i += 1
-            return i
-
-        log = self.run(f, [100000])
-        assert log.result == 100000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i4, i5)
-            guard_true(i7, descr=<Guard3>)
-            guard_not_invalidated(descr=<Guard4>)
-            --EXC-TICK--
-            i14 = int_add(i4, 1)
-            --TICK--
-            jump(..., descr=<Loop0>)
-        """)
 
     def test_chain_of_guards(self):
         src = """
@@ -612,445 +143,11 @@
                 i += 1
             return sum
         """
-        log = self.run(src, [0], threshold=400)
+        log = self.run(src, [0])
         assert log.result == 500*3
         loops = log.loops_by_filename(self.filepath)
         assert len(loops) == 1
 
-    def test_getattr_with_dynamic_attribute(self):
-        src = """
-        class A(object):
-            pass
-
-        l = ["x", "y"]
-
-        def main():
-            sum = 0
-            a = A()
-            a.a1 = 0
-            a.a2 = 0
-            a.a3 = 0
-            a.a4 = 0
-            a.a5 = 0 # workaround, because the first five attributes need a promotion
-            a.x = 1
-            a.y = 2
-            i = 0
-            while i < 500:
-                name = l[i % 2]
-                sum += getattr(a, name)
-                i += 1
-            return sum
-        """
-        log = self.run(src, [], threshold=400)
-        assert log.result == 250 + 250*2
-        loops = log.loops_by_filename(self.filepath)
-        assert len(loops) == 1
-
-    def test_blockstack_virtualizable(self):
-        def main(n):
-            from pypyjit import residual_call
-            i = 0
-            while i < n:
-                try:
-                    residual_call(len, [])   # ID: call
-                except:
-                    pass
-                i += 1
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_id('call')
-        assert loop.match_by_id('call', opcode='CALL_FUNCTION', expected_src="""
-            # make sure that the "block" is not allocated
-            ...
-            i20 = force_token()
-            setfield_gc(p0, i20, descr=<SignedFieldDescr .*PyFrame.vable_token .*>)
-            p22 = new_with_vtable(19511408)
-            p24 = new_array(1, descr=<GcPtrArrayDescr>)
-            p26 = new_with_vtable(ConstClass(W_ListObject))
-            p27 = new(descr=<SizeDescr .*>)
-            p29 = new_array(0, descr=<GcPtrArrayDescr>)
-            setfield_gc(p27, p29, descr=<GcPtrFieldDescr list.items .*>)
-            setfield_gc(p26, p27, descr=<.* .*W_ListObject.inst_wrappeditems .*>)
-            setarrayitem_gc(p24, 0, p26, descr=<GcPtrArrayDescr>)
-            setfield_gc(p22, p24, descr=<GcPtrFieldDescr .*Arguments.inst_arguments_w .*>)
-            p32 = call_may_force(11376960, p18, p22, descr=<GcPtrCallDescr>)
-            ...
-        """)
-
-    def test_import_in_function(self):
-        def main(n):
-            i = 0
-            while i < n:
-                from sys import version  # ID: import
-                i += 1
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_id('import')
-        assert loop.match_by_id('import', """
-            p11 = getfield_gc(ConstPtr(ptr10), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            guard_value(p11, ConstPtr(ptr12), descr=<Guard4>)
-            guard_not_invalidated(descr=<Guard5>)
-            p14 = getfield_gc(ConstPtr(ptr13), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            p16 = getfield_gc(ConstPtr(ptr15), descr=<GcPtrFieldDescr pypy.objspace.std.celldict.ModuleCell.inst_w_value 8>)
-            guard_value(p14, ConstPtr(ptr17), descr=<Guard6>)
-            guard_isnull(p16, descr=<Guard7>)
-        """)
-
-    def test_import_fast_path(self, tmpdir):
-        pkg = tmpdir.join('mypkg').ensure(dir=True)
-        pkg.join('__init__.py').write("")
-        pkg.join('mod.py').write(str(py.code.Source("""
-            def do_the_import():
-                import sys
-        """)))
-        def main(path, n):
-            import sys
-            sys.path.append(path)
-            from mypkg.mod import do_the_import
-            for i in range(n):
-                do_the_import()
-        #
-        log = self.run(main, [str(tmpdir), 300], threshold=200)
-        loop, = log.loops_by_filename(self.filepath)
-        # this is a check for a slow-down that introduced a
-        # call_may_force(absolute_import_with_lock).
-        for opname in log.opnames(loop.allops(opcode="IMPORT_NAME")):
-            assert 'call' not in opname    # no call-like opcode
-
-    def test_arraycopy_disappears(self):
-        def main(n):
-            i = 0
-            while i < n:
-                t = (1, 2, 3, i + 1)
-                t2 = t[:]
-                del t
-                i = t2[3]
-                del t2
-            return i
-        #
-        log = self.run(main, [500], threshold=400)
-        assert log.result == 500
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, i6)
-            guard_true(i7, descr=<Guard3>)
-            i9 = int_add(i5, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i9, i6, descr=<Loop0>)
-        """)
-
-    def test_boolrewrite_inverse(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(i >= y)
-
-        where x and y can be either constants or variables. There are cases in
-        which the second guard is proven to be always true.
-        """
-
-        for a, b, res, opt_expected in (('2000', '2000', 20001000, True),
-                                        ( '500',  '500', 15001500, True),
-                                        ( '300',  '600', 16001700, False),
-                                        (   'a',    'b', 16001700, False),
-                                        (   'a',    'a', 13001700, True)):
-            src = """
-                def main():
-                    sa = 0
-                    a = 300
-                    b = 600
-                    for i in range(1000):
-                        if i < %s:         # ID: lt
-                            sa += 1
-                        else:
-                            sa += 2
-                        #
-                        if i >= %s:        # ID: ge
-                            sa += 10000
-                        else:
-                            sa += 20000
-                    return sa
-            """ % (a, b)
-            #
-            log = self.run(src, [], threshold=400)
-            assert log.result == res
-            loop, = log.loops_by_filename(self.filepath)
-            le_ops = log.opnames(loop.ops_by_id('lt'))
-            ge_ops = log.opnames(loop.ops_by_id('ge'))
-            assert le_ops.count('int_lt') == 1
-            #
-            if opt_expected:
-                assert ge_ops.count('int_ge') == 0
-            else:
-                # if this assert fails it means that the optimization was
-                # applied even if we don't expect to. Check whether the
-                # optimization is valid, and either fix the code or fix the
-                # test :-)
-                assert ge_ops.count('int_ge') == 1
-
-    def test_boolrewrite_reflex(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(y > i)
-
-        where x and y can be either constants or variables. There are cases in
-        which the second guard is proven to be always true.
-        """
-        for a, b, res, opt_expected in (('2000', '2000', 10001000, True),
-                                        ( '500',  '500', 15001500, True),
-                                        ( '300',  '600', 14001700, False),
-                                        (   'a',    'b', 14001700, False),
-                                        (   'a',    'a', 17001700, True)):
-
-            src = """
-                def main():
-                    sa = 0
-                    a = 300
-                    b = 600
-                    for i in range(1000):
-                        if i < %s:        # ID: lt
-                            sa += 1
-                        else:
-                            sa += 2
-                        if %s > i:        # ID: gt
-                            sa += 10000
-                        else:
-                            sa += 20000
-                    return sa
-            """ % (a, b)
-            log = self.run(src, [], threshold=400)
-            assert log.result == res
-            loop, = log.loops_by_filename(self.filepath)
-            le_ops = log.opnames(loop.ops_by_id('lt'))
-            gt_ops = log.opnames(loop.ops_by_id('gt'))
-            assert le_ops.count('int_lt') == 1
-            #
-            if opt_expected:
-                assert gt_ops.count('int_gt') == 0
-            else:
-                # if this assert fails it means that the optimization was
-                # applied even if we don't expect to. Check whether the
-                # optimization is valid, and either fix the code or fix the
-                # test :-)
-                assert gt_ops.count('int_gt') == 1
-
-
-    def test_boolrewrite_allcases_inverse(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(i > y)
-
-        with all possible combination of binary comparison operators.  This
-        test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        for op1 in ops:
-            for op2 in ops:
-                for a,b in ((500, 500), (300, 600)):
-                    src = """
-                        def main():
-                            sa = 0
-                            for i in range(300):
-                                if i %s %d:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if i %s %d:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                            return sa
-                    """ % (op1, a, op2, b)
-                    self.run_and_check(src, threshold=200)
-
-                    src = """
-                        def main():
-                            sa = 0
-                            i = 0.0
-                            while i < 250.0:
-                                if i %s %f:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if i %s %f:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                                i += 0.25
-                            return sa
-                    """ % (op1, float(a)/4.0, op2, float(b)/4.0)
-                    self.run_and_check(src, threshold=300)
-
-
-    def test_boolrewrite_allcases_reflex(self):
-        """
-        Test for this case::
-            guard(i < x)
-            ...
-            guard(x > i)
-
-        with all possible combination of binary comparison operators.  This
-        test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        for op1 in ops:
-            for op2 in ops:
-                for a,b in ((500, 500), (300, 600)):
-                    src = """
-                        def main():
-                            sa = 0
-                            for i in range(300):
-                                if i %s %d:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if %d %s i:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                            return sa
-                    """ % (op1, a, b, op2)
-                    self.run_and_check(src, threshold=200)
-
-                    src = """
-                        def main():
-                            sa = 0
-                            i = 0.0
-                            while i < 250.0:
-                                if i %s %f:
-                                    sa += 1
-                                else:
-                                    sa += 2
-                                if %f %s i:
-                                    sa += 10000
-                                else:
-                                    sa += 20000
-                                i += 0.25
-                            return sa
-                    """ % (op1, float(a)/4.0, float(b)/4.0, op2)
-                    self.run_and_check(src, threshold=300)
-
-    def test_boolrewrite_ptr(self):
-        """
-        This test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
-        for e1 in compares:
-            for e2 in compares:
-                src = """
-                    class tst(object):
-                        pass
-                    def main():
-                        a = tst()
-                        b = tst()
-                        c = tst()
-                        sa = 0
-                        for i in range(300):
-                            if %s:
-                                sa += 1
-                            else:
-                                sa += 2
-                            if %s:
-                                sa += 10000
-                            else:
-                                sa += 20000
-                            if i > 750:
-                                a = b
-                        return sa
-                """ % (e1, e2)
-                self.run_and_check(src, threshold=200)
-
-    def test_array_sum(self):
-        def main():
-            from array import array
-            img = array("i", range(128) * 5) * 480
-            l, i = 0, 0
-            while i < len(img):
-                l += img[i]
-                i += 1
-            return l
-        #
-        log = self.run(main, [])
-        assert log.result == 19507200
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i13 = int_lt(i7, i9)
-            guard_true(i13, descr=<Guard3>)
-            i15 = getarrayitem_raw(i10, i7, descr=<.*ArrayNoLengthDescr>)
-            i16 = int_add_ovf(i8, i15)
-            guard_no_overflow(descr=<Guard4>)
-            i18 = int_add(i7, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
-        """)
-
-    def test_array_intimg(self):
-        def main():
-            from array import array
-            img = array('i', range(3)) * (350 * 480)
-            intimg = array('i', (0,)) * (640 * 480)
-            l, i = 0, 640
-            while i < 640 * 480:
-                assert len(img) == 3*350*480
-                assert len(intimg) == 640*480
-                l = l + img[i]
-                intimg[i] = (intimg[i-640] + l)
-                i += 1
-            return intimg[i - 1]
-        #
-        log = self.run(main, [])
-        assert log.result == 73574560
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i13 = int_lt(i8, 307200)
-            guard_true(i13, descr=<Guard3>)
-        # the bound check guard on img has been killed (thanks to the asserts)
-            i14 = getarrayitem_raw(i10, i8, descr=<.*ArrayNoLengthDescr>)
-            i15 = int_add_ovf(i9, i14)
-            guard_no_overflow(descr=<Guard4>)
-            i17 = int_sub(i8, 640)
-        # the bound check guard on intimg has been killed (thanks to the asserts)
-            i18 = getarrayitem_raw(i11, i17, descr=<.*ArrayNoLengthDescr>)
-            i19 = int_add_ovf(i18, i15)
-            guard_no_overflow(descr=<Guard5>)
-        # on 64bit, there is a guard checking that i19 actually fits into 32bit
-            ...
-            setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
-            i28 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
-        """)
-
-    def test_func_defaults(self):
-        def main(n):
-            i = 1
-            while i < n:
-                i += len(xrange(i+1)) - i
-            return i
-
-        log = self.run(main, [10000])
-        assert log.result == 10000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i5, i6)
-            guard_true(i10, descr=<Guard3>)
-            i120 = int_add(i5, 1)
-            guard_not_invalidated(descr=<Guard4>)
-            --TICK--
-            jump(..., descr=<Loop0>)
-        """)
 
     def test_unpack_iterable_non_list_tuple(self):
         def main(n):
@@ -1067,667 +164,71 @@
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
             i16 = int_ge(i12, i13)
-            guard_false(i16, descr=<Guard3>)
+            guard_false(i16, descr=...)
             p17 = getarrayitem_gc(p15, i12, descr=<GcPtrArrayDescr>)
             i19 = int_add(i12, 1)
-            setfield_gc(p4, i19, descr=<SignedFieldDescr .*W_AbstractSeqIterObject.inst_index .*>)
-            guard_nonnull_class(p17, 146982464, descr=<Guard4>)
+            setfield_gc(p9, i19, descr=<SignedFieldDescr .*W_AbstractSeqIterObject.inst_index .*>)
+            guard_nonnull_class(p17, 146982464, descr=...)
             i21 = getfield_gc(p17, descr=<SignedFieldDescr .*W_ArrayTypei.inst_len .*>)
             i23 = int_lt(0, i21)
-            guard_true(i23, descr=<Guard5>)
+            guard_true(i23, descr=...)
             i24 = getfield_gc(p17, descr=<NonGcPtrFieldDescr .*W_ArrayTypei.inst_buffer .*>)
             i25 = getarrayitem_raw(i24, 0, descr=<.*>)
             i27 = int_lt(1, i21)
-            guard_false(i27, descr=<Guard6>)
+            guard_false(i27, descr=...)
             i28 = int_add_ovf(i10, i25)
-            guard_no_overflow(descr=<Guard7>)
+            guard_no_overflow(descr=...)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, i28, i25, i19, i13, p14, p15, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, p5, p6, i28, i25, p9, p10, p11, i19, i13, p14, p15, descr=<Loop0>)
         """)
 
-    def test_mutate_class(self):
-        def fn(n):
-            class A(object):
-                count = 1
-                def __init__(self, a):
-                    self.a = a
-                def f(self):
-                    return self.count
-            i = 0
-            a = A(1)
-            while i < n:
-                A.count += 1 # ID: mutate
-                i = a.f()    # ID: meth1
-            return i
+
+    def test_dont_trace_every_iteration(self):
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0:
+                    pass
+                if 1 < b < 2:
+                    pass
+                sa += a % b
+                i += 1
+            return sa
         #
-        log = self.run(fn, [1000], threshold=10)
-        assert log.result == 1000
-        #
-        # first, we test the entry bridge
-        # -------------------------------
-        entry_bridge, = log.loops_by_filename(self.filepath, is_entry_bridge=True)
-        ops = entry_bridge.ops_by_id('mutate', opcode='LOAD_ATTR')
-        assert log.opnames(ops) == ['guard_value', 'guard_not_invalidated',
-                                    'getfield_gc', 'guard_nonnull_class']
-        # the STORE_ATTR is folded away
-        assert list(entry_bridge.ops_by_id('meth1', opcode='STORE_ATTR')) == []
-        #
-        # then, the actual loop
-        # ----------------------
+        log = self.run(main, [10, 20])
+        assert log.result == 300 * (10 % 20)
+        assert log.jit_summary.tracing_no == 1
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
-            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
-            i9 = int_lt(i8, i7)
-            guard_true(i9, descr=.*)
-            guard_not_invalidated(descr=.*)
-            i11 = int_add(i8, 1)
-            i12 = force_token()
+            i11 = int_lt(i7, 300)
+            guard_true(i11, descr=...)
+            i12 = int_add_ovf(i8, i9)
+            guard_no_overflow(descr=...)
+            i14 = int_add(i7, 1)
             --TICK--
-            p20 = new_with_vtable(ConstClass(W_IntObject))
-            setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
-            setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
-            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+            jump(..., descr=...)
         """)
+        #
+        log = self.run(main, [-10, -20])
+        assert log.result == 300 * (-10 % -20)
+        assert log.jit_summary.tracing_no == 1
 
 
-    def test_intbound_simple(self):
+    def test_overflow_checking(self):
         """
         This test only checks that we get the expected result, not that any
         optimization has been applied.
         """
-        ops = ('<', '>', '<=', '>=', '==', '!=')
-        nbr = (3, 7)
-        for o1 in ops:
-            for o2 in ops:
-                for n1 in nbr:
-                    for n2 in nbr:
-                        src = '''
-                        def f(i):
-                            a, b = 3, 3
-                            if i %s %d:
-                                a = 0
-                            else:
-                                a = 1
-                            if i %s %d:
-                                b = 0
-                            else:
-                                b = 1
-                            return a + b * 2
-
-                        def main():
-                            res = [0] * 4
-                            idx = []
-                            for i in range(15):
-                                idx.extend([i] * 15)
-                            for i in idx:
-                                res[f(i)] += 1
-                            return res
-
-                        ''' % (o1, n1, o2, n2)
-                        self.run_and_check(src, threshold=200)
-
-    def test_intbound_addsub_mix(self):
-        """
-        This test only checks that we get the expected result, not that any
-        optimization has been applied.
-        """
-        tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
-                 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
-                 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
-        for t1 in tests:
-            for t2 in tests:
-                src = '''
-                def f(i):
-                    a, b = 3, 3
-                    if %s:
-                        a = 0
-                    else:
-                        a = 1
-                    if %s:
-                        b = 0
-                    else:
-                        b = 1
-                    return a + b * 2
-
-                def main():
-                    res = [0] * 4
-                    idx = []
-                    for i in range(15):
-                        idx.extend([i] * 15)
-                    for i in idx:
-                        res[f(i)] += 1
-                    return res
-
-                ''' % (t1, t2)
-                self.run_and_check(src, threshold=200)
-
-    def test_intbound_gt(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < n:
-                if i > -1:
-                    a += 1
-                if i > -2:
-                    b += 1
-                i += 1
-            return (a, b)
+        def main():
+            import sys
+            def f(a,b):
+                if a < 0: return -1
+                return a-b
+            #
+            total = sys.maxint - 2147483647
+            for i in range(100000):
+                total += f(i, 5)
+            #
+            return total
         #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, i9)
-            guard_true(i10, descr=...)
-            i12 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i14 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i17 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
-        """)
-
-    def test_intbound_sub_lt(self):
-        def main():
-            i, a = 0, 0
-            while i < 300:
-                if i - 10 < 295:
-                    a += 1
-                i += 1
-            return a
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i5, 300)
-            guard_true(i7, descr=...)
-            i9 = int_sub_ovf(i5, 10)
-            guard_no_overflow(descr=...)
-            i11 = int_add_ovf(i4, 1)
-            guard_no_overflow(descr=...)
-            i13 = int_add(i5, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
-        """)
-
-    def test_intbound_addsub_ge(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < n:
-                if i + 5 >= 5:
-                    a += 1
-                if i - 1 >= -1:
-                    b += 1
-                i += 1
-            return (a, b)
-        #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, i9)
-            guard_true(i10, descr=...)
-            i12 = int_add_ovf(i8, 5)
-            guard_no_overflow(descr=...)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i19 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
-        """)
-
-    def test_intbound_addmul_ge(self):
-        def main(n):
-            i, a, b = 0, 0, 0
-            while i < 300:
-                if i + 5 >= 5:
-                    a += 1
-                if 2 * i >= 0:
-                    b += 1
-                i += 1
-            return (a, b)
-        #
-        log = self.run(main, [300], threshold=200)
-        assert log.result == (300, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, 300)
-            guard_true(i10, descr=...)
-            i12 = int_add(i8, 5)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_lshift(i8, 1)
-            i18 = int_add_ovf(i6, 1)
-            guard_no_overflow(descr=...)
-            i21 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
-        """)
-
-    def test_intbound_eq(self):
-        def main(a, n):
-            i, s = 0, 0
-            while i < 300:
-                if a == 7:
-                    s += a + 1
-                elif i == 10:
-                    s += i
-                else:
-                    s += 1
-                i += 1
-            return s
-        #
-        log = self.run(main, [7, 300], threshold=200)
-        assert log.result == main(7, 300)
-        log = self.run(main, [10, 300], threshold=200)
-        assert log.result == main(10, 300)
-        log = self.run(main, [42, 300], threshold=200)
-        assert log.result == main(42, 300)
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i10 = int_lt(i8, 300)
-            guard_true(i10, descr=...)
-            i12 = int_eq(i8, 10)
-            guard_false(i12, descr=...)
-            i14 = int_add_ovf(i7, 1)
-            guard_no_overflow(descr=...)
-            i16 = int_add(i8, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
-        """)
-
-    def test_intbound_mul(self):
-        def main(a):
-            i, s = 0, 0
-            while i < 300:
-                assert i >= 0
-                if 2 * i < 30000:
-                    s += 1
-                else:
-                    s += a
-                i += 1
-            return s
-        #
-        log = self.run(main, [7], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i8 = int_lt(i6, 300)
-            guard_true(i8, descr=...)
-            i10 = int_lshift(i6, 1)
-            i12 = int_add_ovf(i5, 1)
-            guard_no_overflow(descr=...)
-            i14 = int_add(i6, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
-        """)
-
-    def test_assert(self):
-        def main(a):
-            i, s = 0, 0
-            while i < 300:
-                assert a == 7
-                s += a + 1
-                i += 1
-            return s
-        log = self.run(main, [7], threshold=200)
-        assert log.result == 300*8
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i8 = int_lt(i6, 300)
-            guard_true(i8, descr=...)
-            i10 = int_add_ovf(i5, 8)
-            guard_no_overflow(descr=...)
-            i12 = int_add(i6, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
-        """)
-
-    def test_zeropadded(self):
-        def main():
-            from array import array
-            class ZeroPadded(array):
-                def __new__(cls, l):
-                    self = array.__new__(cls, 'd', range(l))
-                    return self
-
-                def __getitem__(self, i):
-                    if i < 0 or i >= len(self):
-                        return 0
-                    return array.__getitem__(self, i) # ID: get
-            #
-            buf = ZeroPadded(2000)
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-
-        log = self.run(main, [], threshold=200)
-        assert log.result == 9895050.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the overloaded __getitem__ does not introduce double
-        # array bound checks.
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i20 = int_ge(i18, i8)
-            guard_false(i20, descr=...)
-            f21 = getarrayitem_raw(i13, i18, descr=...)
-            f23 = getarrayitem_raw(i13, i14, descr=...)
-            f24 = float_add(f21, f23)
-            f26 = getarrayitem_raw(i13, i6, descr=...)
-            f27 = float_add(f24, f26)
-            i29 = int_add(i6, 1)
-            i31 = int_ge(i29, i8)
-            guard_false(i31, descr=...)
-            f33 = getarrayitem_raw(i13, i29, descr=...)
-            f34 = float_add(f27, f33)
-            i36 = int_add(i6, 2)
-            i38 = int_ge(i36, i8)
-            guard_false(i38, descr=...)
-            f39 = getarrayitem_raw(i13, i36, descr=...)
-            ...
-        """)
-
-
-    def test_circular(self):
-        def main():
-            from array import array
-            class Circular(array):
-                def __new__(cls):
-                    self = array.__new__(cls, 'd', range(256))
-                    return self
-                def __getitem__(self, i):
-                    assert len(self) == 256
-                    return array.__getitem__(self, i & 255)
-            #
-            buf = Circular()
-            i = 10
-            sa = 0
-            while i < 2000 - 10:
-                sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
-                i += 1
-            return sa
-        #
-        log = self.run(main, [], threshold=200)
-        assert log.result == 1239690.0
-        loop, = log.loops_by_filename(self.filepath)
-        #
-        # check that the array bound checks are removed
-        #
-        # The force_token()s are still there, but will be eliminated by the
-        # backend regalloc, so they are harmless
-        assert loop.match(ignore_ops=['force_token'],
-                          expected_src="""
-            ...
-            i17 = int_and(i14, 255)
-            f18 = getarrayitem_raw(i8, i17, descr=...)
-            f20 = getarrayitem_raw(i8, i9, descr=...)
-            f21 = float_add(f18, f20)
-            f23 = getarrayitem_raw(i8, i10, descr=...)
-            f24 = float_add(f21, f23)
-            i26 = int_add(i6, 1)
-            i29 = int_and(i26, 255)
-            f30 = getarrayitem_raw(i8, i29, descr=...)
-            f31 = float_add(f24, f30)
-            i33 = int_add(i6, 2)
-            i36 = int_and(i33, 255)
-            f37 = getarrayitem_raw(i8, i36, descr=...)
-            ...
-        """)
-
-    def test_min_max(self):
-        def main():
-            i=0
-            sa=0
-            while i < 300:
-                sa+=min(max(i, 3000), 4000)
-                i+=1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == 300*3000
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match("""
-            i7 = int_lt(i4, 300)
-            guard_true(i7, descr=...)
-            i9 = int_add_ovf(i5, 3000)
-            guard_no_overflow(descr=...)
-            i11 = int_add(i4, 1)
-            --TICK--
-            jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
-        """)
-
-    def test_silly_max(self):
-        def main():
-            i = 2
-            sa = 0
-            while i < 300:
-                lst = range(i)
-                sa += max(*lst) # ID: max
-                i += 1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == main()
-        loop, = log.loops_by_filename(self.filepath)
-        # We dont want too many guards, but a residual call to min_max_loop
-        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
-        assert len(guards) < 20
-        assert loop.match_by_id('max',"""
-            ...
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
-            ...
-        """)
-
-    def test_iter_max(self):
-        def main():
-            i = 2
-            sa = 0
-            while i < 300:
-                lst = range(i)
-                sa += max(lst) # ID: max
-                i += 1
-            return sa
-        log = self.run(main, [], threshold=200)
-        assert log.result == main()
-        loop, = log.loops_by_filename(self.filepath)
-        # We dont want too many guards, but a residual call to min_max_loop
-        guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
-        assert len(guards) < 20
-        assert loop.match_by_id('max',"""
-            ...
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
-            ...
-        """)
-
-    def test__ffi_call(self):
-        from pypy.rlib.test.test_libffi import get_libm_name
-        def main(libm_name):
-            try:
-                from _ffi import CDLL, types
-            except ImportError:
-                sys.stderr.write('SKIP: cannot import _ffi\n')
-                return 0
-
-            libm = CDLL(libm_name)
-            pow = libm.getfunc('pow', [types.double, types.double],
-                               types.double)
-            i = 0
-            res = 0
-            while i < 300:
-                res += pow(2, 3)
-                i += 1
-            return pow.getaddr(), res
-        #
-        libm_name = get_libm_name(sys.platform)
-        log = self.run(main, [libm_name], threshold=200)
-        pow_addr, res = log.result
-        assert res == 8.0 * 300
-        loop, = log.loops_by_filename(self.filepath)
-        # XXX: write the actual test when we merge this to jitypes2
-        ## ops = self.get_by_bytecode('CALL_FUNCTION')
-        ## assert len(ops) == 2 # we get two loops, because of specialization
-        ## call_function = ops[0]
-        ## last_ops = [op.getopname() for op in call_function[-5:]]
-        ## assert last_ops == ['force_token',
-        ##                     'setfield_gc',
-        ##                     'call_may_force',
-        ##                     'guard_not_forced',
-        ##                     'guard_no_exception']
-        ## call = call_function[-3]
-        ## assert call.getarg(0).value == pow_addr
-        ## assert call.getarg(1).value == 2.0
-        ## assert call.getarg(2).value == 3.0
-
-    def test_xor(self):
-        def main(b):
-            a = sa = 0
-            while a < 300:
-                if a > 0: # Specialises the loop
-                    pass
-                if b > 10:
-                    pass
-                if a^b >= 0:  # ID: guard
-                    sa += 1
-                sa += a^a     # ID: a_xor_a
-                a += 1
-            return sa
-
-        log = self.run(main, [11], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        # if both are >=0, a^b is known to be >=0
-        # note that we know that b>10
-        assert loop.match_by_id('guard', """
-            i10 = int_xor(i5, i7)
-        """)
-        #
-        # x^x is always optimized to 0
-        assert loop.match_by_id('a_xor_a', "")
-
-        log = self.run(main, [9], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        # we don't know that b>10, hence we cannot optimize it
-        assert loop.match_by_id('guard', """
-            i10 = int_xor(i5, i7)
-            i12 = int_ge(i10, 0)
-            guard_true(i12, descr=...)
-        """)
-
-    def test_shift_intbound(self):
-        def main(b):
-            res = 0
-            a = 0
-            while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                val = a >> b
-                if val >= 0:    # ID: rshift
-                    res += 1
-                val = a << b
-                if val >= 0:    # ID: lshift
-                    res += 2
-                a += 1
-            return res
-        #
-        log = self.run(main, [2], threshold=200)
-        assert log.result == 300*3
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('rshift', "")  # guard optimized away
-        assert loop.match_by_id('lshift', "")  # guard optimized away
-
-    def test_lshift_and_then_rshift(self):
-        py.test.skip('fixme, this optimization is disabled')
-        def main(b):
-            res = 0
-            a = 0
-            while res < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = (a << b) >> b     # ID: shift
-                a += 1
-            return res
-        #
-        log = self.run(main, [2], threshold=200)
-        assert log.result == 300
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('shift', "")  # optimized away
-
-    def test_division_to_rshift(self):
-        py.test.skip('in-progress')
-        def main(b):
-            res = 0
-            a = 0
-            while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = a/b     # ID: div
-                a += 1
-            return res
-        #
-        log = self.run(main, [3], threshold=200)
-        #assert log.result == 149
-        loop, = log.loops_by_filename(self.filepath)
-        import pdb;pdb.set_trace()
-        assert loop.match_by_id('div', "")  # optimized away
-
-    def test_oldstyle_newstyle_mix(self):
-        def main():
-            class A:
-                pass
-
-            class B(object, A):
-                def __init__(self, x):
-                    self.x = x
-
-            i = 0
-            b = B(1)
-            while i < 100:
-                v = b.x # ID: loadattr
-                i += v
-            return i
-
-        log = self.run(main, [], threshold=80)
-        loop, = log.loops_by_filename(self.filepath)
-        loop.match_by_id('loadattr',
-        '''
-        guard_not_invalidated(descr=...)
-        i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
-        guard_no_exception(descr=...)
-        i21 = int_and(i19, _)
-        i22 = int_is_true(i21)
-        guard_true(i22, descr=...)
-        i26 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
-        guard_no_exception(descr=...)
-        i28 = int_and(i26, _)
-        i29 = int_is_true(i28)
-        guard_true(i29, descr=...)
-        ''')
-
-    def test_python_contains(self):
-        def main():
-            class A(object):
-                def __contains__(self, v):
-                    return True
-
-            i = 0
-            a = A()
-            while i < 100:
-                i += i in a # ID: contains
-
-            log = self.run(main, [], threshold=80)
-            loop, = log.loops_by_filename(self.filemath)
-            # XXX: haven't confirmed his is correct, it's probably missing a
-            # few instructions
-            loop.match_by_id("contains", """
-                i1 = int_add(i0, 1)
-            """)
+        self.run_and_check(main, [])
diff --git a/pypy/module/pypyjit/test_pypy_c/test_shift.py b/pypy/module/pypyjit/test_pypy_c/test_shift.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_shift.py
@@ -0,0 +1,166 @@
+import py
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestShift(BaseTestPyPyC):
+
+    def test_shift_intbound(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                val = a >> b
+                if val >= 0:    # ID: rshift
+                    res += 1
+                val = a << b
+                if val >= 0:    # ID: lshift
+                    res += 2
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300*3
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('rshift', "")  # guard optimized away
+        assert loop.match_by_id('lshift', "")  # guard optimized away
+
+    def test_lshift_and_then_rshift(self):
+        py.test.skip('fixme, this optimization is disabled')
+        def main(b):
+            res = 0
+            a = 0
+            while res < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = (a << b) >> b     # ID: shift
+                a += 1
+            return res
+        #
+        log = self.run(main, [2])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('shift', "")  # optimized away
+
+    def test_division_to_rshift(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                assert a >= 0
+                assert 0 <= b <= 10
+                res = a/b     # ID: div
+                a += 1
+            return res
+        #
+        log = self.run(main, [3])
+        assert log.result == 99
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('div', """
+            i10 = int_floordiv(i6, i7)
+            i11 = int_mul(i10, i7)
+            i12 = int_sub(i6, i11)
+            i14 = int_rshift(i12, 63)
+            i15 = int_add(i10, i14)
+        """)
+
+    def test_division_to_rshift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s / %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_mod(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        avalues = ('a', 'b', 7, -42, 8)
+        bvalues = ['b'] + range(-10, 0) + range(1,10)
+        code = ''
+        for a in avalues:
+            for b in bvalues:
+                code += '                sa += %s %% %s\n' % (a, b)
+        src = """
+        def main(a, b):
+            i = sa = 0
+            while i < 2000:
+                if a > 0: pass
+                if 1 < b < 2: pass
+%s
+                i += 1
+            return sa
+        """ % code
+        self.run_and_check(src, [ 10,  20])
+        self.run_and_check(src, [ 10, -20])
+        self.run_and_check(src, [-10, -20])
+
+    def test_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0: # Specialises the loop
+                    pass
+                if b < 2 and b > 0:
+                    pass
+                if (a >> b) >= 0:
+                    sa += 1
+                if (a << b) > 2:
+                    sa += 10000
+                i += 1
+            return sa
+        #
+        maxvals = (-maxint-1, -maxint, maxint-1, maxint)
+        for a in (-4, -3, -2, -1, 0, 1, 2, 3, 4) + maxvals:
+            for b in (0, 1, 2, 31, 32, 33, 61, 62, 63):
+                yield self.run_and_check, main, [a, b]
+
+    def test_revert_shift_allcases(self):
+        """
+        This test only checks that we get the expected result, not that any
+        optimization has been applied.
+        """
+        from sys import maxint
+
+        def main(a, b, c):
+            from sys import maxint
+            i = sa = 0
+            while i < 300:
+                if 0 < a < 10: pass
+                if -100 < b < 100: pass
+                if -maxint/2 < c < maxint/2: pass
+                sa += (a<<a)>>a
+                sa += (b<<a)>>a
+                sa += (c<<a)>>a
+                sa += (a<<100)>>100
+                sa += (b<<100)>>100
+                sa += (c<<100)>>100
+                i += 1
+            return long(sa)
+
+        for a in (1, 4, 8, 100):
+            for b in (-10, 10, -201, 201, -maxint/3, maxint/3):
+                for c in (-10, 10, -maxint/3, maxint/3):
+                    yield self.run_and_check, main, [a, b, c]
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -0,0 +1,107 @@
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestString(BaseTestPyPyC):
+    def test_lookup_default_encoding(self):
+        def main(n):
+            import string
+            i = 0
+            letters = string.letters
+            uletters = unicode(string.letters)
+            while i < n:
+                i += letters[i % len(letters)] == uletters[i % len(letters)]
+            return i
+
+        log = self.run(main, [300])
+        assert log.result == 300
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i14 = int_lt(i6, i9)
+            guard_true(i14, descr=...)
+            guard_not_invalidated(descr=...)
+            i15 = int_mod(i6, i10)
+            i17 = int_rshift(i15, 63)
+            i18 = int_and(i10, i17)
+            i19 = int_add(i15, i18)
+            i21 = int_lt(i19, 0)
+            guard_false(i21, descr=...)
+            i22 = int_ge(i19, i10)
+            guard_false(i22, descr=...)
+            i23 = strgetitem(p11, i19)
+            i24 = int_ge(i19, i12)
+            guard_false(i24, descr=...)
+            i25 = unicodegetitem(p13, i19)
+            p27 = newstr(1)
+            strsetitem(p27, 0, i23)
+            p30 = call(ConstClass(ll_str2unicode__rpy_stringPtr), p27, descr=<GcPtrCallDescr>)
+            guard_no_exception(descr=...)
+            i32 = call(ConstClass(_ll_2_str_eq_checknull_char__rpy_unicodePtr_UniChar), p30, i25, descr=<SignedCallDescr>)
+            guard_true(i32, descr=...)
+            i34 = int_add(i6, 1)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i34, p7, p8, i9, i10, p11, i12, p13, descr=<Loop4>)
+        """)
+
+    def test_long(self):
+        def main(n):
+            import string
+            i = 1
+            while i < n:
+                i += int(long(string.digits[i % len(string.digits)], 16))
+            return i
+
+        log = self.run(main, [1000])
+        assert log.result == main(1000)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i11 = int_lt(i6, i7)
+            guard_true(i11, descr=...)
+            guard_not_invalidated(descr=...)
+            i13 = int_eq(i6, -9223372036854775808)
+            guard_false(i13, descr=...)
+            i15 = int_mod(i6, i8)
+            i17 = int_rshift(i15, 63)
+            i18 = int_and(i8, i17)
+            i19 = int_add(i15, i18)
+            i21 = int_lt(i19, 0)
+            guard_false(i21, descr=...)
+            i22 = int_ge(i19, i8)
+            guard_false(i22, descr=...)
+            i23 = strgetitem(p10, i19)
+            p25 = newstr(1)
+            strsetitem(p25, 0, i23)
+            p28 = call(ConstClass(strip_spaces), p25, descr=<GcPtrCallDescr>)
+            guard_no_exception(descr=...)
+            i29 = strlen(p28)
+            i30 = int_is_true(i29)
+            guard_true(i30, descr=...)
+            i32 = int_sub(i29, 1)
+            i33 = strgetitem(p28, i32)
+            i35 = int_eq(i33, 108)
+            guard_false(i35, descr=...)
+            i37 = int_eq(i33, 76)
+            guard_false(i37, descr=...)
+            i39 = strgetitem(p28, 0)
+            i41 = int_eq(i39, 45)
+            guard_false(i41, descr=...)
+            i43 = int_eq(i39, 43)
+            guard_false(i43, descr=...)
+            i43 = call(ConstClass(ll_startswith__rpy_stringPtr_rpy_stringPtr), p28, ConstPtr(ptr42), descr=<BoolCallDescr>)
+            guard_false(i43, descr=...)
+            i46 = call(ConstClass(ll_startswith__rpy_stringPtr_rpy_stringPtr), p28, ConstPtr(ptr45), descr=<BoolCallDescr>)
+            guard_false(i46, descr=...)
+            p51 = new_with_vtable(21136408)
+            setfield_gc(p51, p28, descr=<GcPtrFieldDescr .*NumberStringParser.inst_literal .*>)
+            setfield_gc(p51, ConstPtr(ptr51), descr=<GcPtrFieldDescr pypy.objspace.std.strutil.NumberStringParser.inst_fname .*>)
+            setfield_gc(p51, i29, descr=<SignedFieldDescr .*NumberStringParser.inst_n .*>)
+            setfield_gc(p51, 1, descr=<SignedFieldDescr .*NumberStringParser.inst_sign .*>)
+            setfield_gc(p51, 16, descr=<SignedFieldDescr .*NumberStringParser.inst_base .*>)
+            setfield_gc(p51, p28, descr=<GcPtrFieldDescr .*NumberStringParser.inst_s .*>)
+            p55 = call(ConstClass(parse_digit_string), p51, descr=<GcPtrCallDescr>)
+            guard_no_exception(descr=...)
+            i57 = call(ConstClass(rbigint.toint), p55, descr=<SignedCallDescr>)
+            guard_no_exception(descr=...)
+            i58 = int_add_ovf(i6, i57)
+            guard_no_overflow(descr=...)
+            --TICK--
+            jump(p0, p1, p2, p3, p4, p5, i58, i7, i8, p9, p10, descr=<Loop4>)
+        """)
\ No newline at end of file
diff --git a/pypy/module/signal/interp_signal.py b/pypy/module/signal/interp_signal.py
--- a/pypy/module/signal/interp_signal.py
+++ b/pypy/module/signal/interp_signal.py
@@ -80,7 +80,7 @@
 
 pypysig_getaddr_occurred = external('pypysig_getaddr_occurred', [],
                                     lltype.Ptr(LONG_STRUCT), _nowrapper=True,
-                                    pure_function=True)
+                                    elidable_function=True)
 c_alarm = external('alarm', [rffi.INT], rffi.INT)
 c_pause = external('pause', [], rffi.INT)
 c_siginterrupt = external('siginterrupt', [rffi.INT, rffi.INT], rffi.INT)
diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py
--- a/pypy/module/sys/__init__.py
+++ b/pypy/module/sys/__init__.py
@@ -7,6 +7,8 @@
 
 class Module(MixedModule):
     """Sys Builtin Module. """
+    _immutable_fields_ = ["defaultencoding?"]
+
     def __init__(self, space, w_name):
         """NOT_RPYTHON""" # because parent __init__ isn't
         if space.config.translating:
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -57,7 +57,8 @@
         raise OperationError(space.w_ValueError,
                              space.wrap("recursion limit must be positive"))
     space.sys.recursionlimit = new_limit
-    _stack_set_length_fraction(new_limit * 0.001)
+    if space.config.translation.type_system == 'lltype':
+        _stack_set_length_fraction(new_limit * 0.001)
 
 def getrecursionlimit(space):
     """Return the last value set by setrecursionlimit().
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
--- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
+++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
@@ -43,6 +43,12 @@
 	qsort(base, num, width, compare);
 }
 
+EXPORT(char) deref_LP_c_char_p(char** argv)
+{
+    char* s = *argv;
+    return s[0];
+}
+
 EXPORT(int *) _testfunc_ai8(int a[8])
 {
 	return a;
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/support.py b/pypy/module/test_lib_pypy/ctypes_tests/support.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/support.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/support.py
@@ -1,4 +1,5 @@
 import py
+import sys
 import ctypes
 
 py.test.importorskip("ctypes", "1.0.2")
@@ -14,6 +15,16 @@
         if _rawffi:
             py.test.skip("white-box tests for pypy _rawffi based ctypes impl")
 
+def del_funcptr_refs_maybe(obj, attrname):
+    dll = getattr(obj, attrname, None)
+    if not dll:
+        return
+    _FuncPtr = dll._FuncPtr
+    for name in dir(dll):
+        obj = getattr(dll, name, None)
+        if isinstance(obj, _FuncPtr):
+            delattr(dll, name)
+
 class BaseCTypesTestChecker:
     def setup_class(cls):
         if _rawffi:
@@ -21,8 +32,21 @@
             for _ in range(4):
                 gc.collect()
             cls.old_num = _rawffi._num_of_allocated_objects()
-    
+
+
     def teardown_class(cls):
+        if sys.pypy_translation_info['translation.gc'] == 'boehm':
+            return # it seems that boehm has problems with __del__, so not
+                   # everything is freed
+        #
+        mod = sys.modules[cls.__module__]
+        del_funcptr_refs_maybe(mod, 'dll')
+        del_funcptr_refs_maybe(mod, 'dll2')
+        del_funcptr_refs_maybe(mod, 'lib')
+        del_funcptr_refs_maybe(mod, 'testdll')
+        del_funcptr_refs_maybe(mod, 'ctdll')
+        del_funcptr_refs_maybe(cls, '_dll')
+        #
         if hasattr(cls, 'old_num'):
             import gc
             for _ in range(4):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py b/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_commethods.py
@@ -0,0 +1,82 @@
+# unittest for SOME ctypes com function calls.
+# Can't resist from implementing some kind of mini-comtypes
+# theller ;-)
+
+import py
+import sys
+if sys.platform != "win32":
+    py.test.skip('windows only test')
+
+import ctypes, new, unittest
+from ctypes.wintypes import HRESULT
+from _ctypes import COMError
+
+oleaut32 = ctypes.OleDLL("oleaut32")
+
+class UnboundMethod(object):
+    def __init__(self, func, index, name):
+        self.func = func
+        self.index = index
+        self.name = name
+        self.__doc__ = func.__doc__
+
+    def __repr__(self):
+        return "<Unbound COM method index %d: %s at %x>" % (self.index, self.name, id(self))
+
+    def __get__(self, instance, owner):
+        if instance is None:
+            return self
+        return new.instancemethod(self.func, instance, owner)
+    
+def commethod(index, restype, *argtypes):
+    """A decorator that generates COM methods.  The decorated function
+    itself is not used except for it's name."""
+    def make_commethod(func):
+        comfunc = ctypes.WINFUNCTYPE(restype, *argtypes)(index, func.__name__)
+        comfunc.__name__ = func.__name__
+        comfunc.__doc__ = func.__doc__
+        return UnboundMethod(comfunc, index, func.__name__)
+    return make_commethod
+
+class ICreateTypeLib2(ctypes.c_void_p):
+
+    @commethod(1, ctypes.c_long)
+    def AddRef(self):
+        pass
+
+    @commethod(2, ctypes.c_long)
+    def Release(self):
+        pass
+
+    @commethod(4, HRESULT, ctypes.c_wchar_p)
+    def SetName(self):
+        """Set the name of the library."""
+
+    @commethod(12, HRESULT)
+    def SaveAllChanges(self):
+        pass
+
+
+CreateTypeLib2 = oleaut32.CreateTypeLib2
+CreateTypeLib2.argtypes = (ctypes.c_int, ctypes.c_wchar_p, ctypes.POINTER(ICreateTypeLib2))
+
+################################################################
+
+def test_basic_comtypes():
+    punk = ICreateTypeLib2()
+    hr = CreateTypeLib2(0, "foobar.tlb", punk)
+    assert hr == 0
+
+    assert 2 == punk.AddRef()
+    assert 3 == punk.AddRef()
+    assert 4 == punk.AddRef()
+
+    punk.SetName("TypeLib_ByPYPY")
+    py.test.raises(COMError, lambda: punk.SetName(None))
+
+    # This would save the typelib to disk.
+    ## punk.SaveAllChanges()
+
+    assert 3 == punk.Release()
+    assert 2 == punk.Release()
+    assert 1 == punk.Release()
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -0,0 +1,103 @@
+from ctypes import CDLL, POINTER, pointer, c_byte, c_int, c_char_p
+import sys
+import py
+from support import BaseCTypesTestChecker
+
+class MyCDLL(CDLL):
+    def __getattr__(self, attr):
+        fn = self[attr] # this way it's not cached as an attribute
+        fn._slowpath_allowed = False
+        return fn
+
+def setup_module(mod):
+    import conftest
+    _ctypes_test = str(conftest.sofile)
+    mod.dll = MyCDLL(_ctypes_test)  # slowpath not allowed
+    mod.dll2 = CDLL(_ctypes_test)   # slowpath allowed
+
+
+class TestFastpath(BaseCTypesTestChecker):
+
+    def test_fastpath_forbidden(self):
+        def myfunc():
+            pass
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        #
+        # so far, it's still using the slowpath
+        assert not tf_b._is_fastpath
+        tf_b.callable = myfunc
+        tf_b.argtypes = (c_byte,)
+        # errcheck prevented the fastpath to kick in
+        assert not tf_b._is_fastpath
+        #
+        del tf_b.callable
+        tf_b.argtypes = (c_byte,) # try to re-enable the fastpath
+        assert tf_b._is_fastpath
+        #
+        assert not tf_b._slowpath_allowed
+        py.test.raises(AssertionError, "tf_b.callable = myfunc")
+        py.test.raises(AssertionError, "tf_b('aaa')") # force a TypeError
+
+    def test_simple_args(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+
+    def test_pointer_args(self):
+        f = dll._testfunc_p_p
+        f.restype = POINTER(c_int)
+        f.argtypes = [POINTER(c_int)]
+        v = c_int(42)
+        result = f(pointer(v))
+        assert type(result) == POINTER(c_int)
+        assert result.contents.value == 42
+
+    def test_simple_pointer_args(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        mystr = c_char_p("abcd")
+        result = f(mystr, ord("b"))
+        assert result == "bcd"
+
+    @py.test.mark.xfail
+    def test_strings(self):
+        f = dll.my_strchr
+        f.argtypes = [c_char_p, c_int]
+        f.restype = c_char_p
+        # python strings need to be converted to c_char_p, but this is
+        # supported only in the slow path so far
+        result = f("abcd", ord("b"))
+        assert result == "bcd"
+
+    def test_errcheck(self):
+        def errcheck(result, func, args):
+            return 'hello'
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == 'hello'
+
+
+class TestFallbackToSlowpath(BaseCTypesTestChecker):
+
+    def test_argtypes_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_char_p,)  # this is intentionally wrong
+        tf_b.argtypes = None # kill the fast path
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -42
+
+    def test_callable_is_None(self):
+        tf_b = dll2.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.callable = lambda x: x+1
+        assert not tf_b._is_fastpath
+        assert tf_b(-126) == -125
+        tf_b.callable = None
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_functions.py
@@ -91,6 +91,13 @@
         result = f(0, 0, 0, 0, 0, 0)
         assert result == u'\x00'
 
+    def test_char_result(self):
+        f = dll._testfunc_i_bhilfd
+        f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
+        f.restype = c_char
+        result = f(0, 0, 0, 0, 0, 0)
+        assert result == '\x00'
+
     def test_voidresult(self):
         f = dll._testfunc_v
         f.restype = None
@@ -125,6 +132,16 @@
         # You cannot assing character format codes as restype any longer
         raises(TypeError, setattr, f, "restype", "i")
 
+
+    def test_truncate_python_longs(self):
+        f = dll._testfunc_i_bhilfd
+        f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
+        f.restype = c_int
+        x = sys.maxint * 2
+        result = f(x, x, x, x, 0, 0)
+        assert result == -8
+
+
     def test_floatresult(self):
         f = dll._testfunc_f_bhilfd
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
@@ -211,8 +228,19 @@
         result = f(byref(c_int(99)))
         assert not result.contents == 99
 
+    def test_convert_pointers(self):
+        f = dll.deref_LP_c_char_p
+        f.restype = c_char
+        f.argtypes = [POINTER(c_char_p)]
+        #
+        s = c_char_p('hello world')
+        ps = pointer(s)
+        assert f(ps) == 'h'
+        assert f(s) == 'h'  # automatic conversion from char** to char*
+
     def test_errors_1(self):
         f = dll._testfunc_p_p
+        f.argtypes = [POINTER(c_int)]
         f.restype = c_int
 
         class X(Structure):
@@ -393,6 +421,23 @@
         result = f("abcd", ord("b"))
         assert result == "bcd"
 
+    def test_keepalive_buffers(self, monkeypatch):
+        import gc
+        f = dll.my_strchr
+        f.argtypes = [c_char_p]
+        f.restype = c_char_p
+        #
+        orig__call_funcptr = f._call_funcptr
+        def _call_funcptr(funcptr, *newargs):
+            gc.collect()
+            gc.collect()
+            gc.collect()
+            return orig__call_funcptr(funcptr, *newargs)
+        monkeypatch.setattr(f, '_call_funcptr', _call_funcptr)
+        #
+        result = f("abcd", ord("b"))
+        assert result == "bcd"
+
     def test_caching_bug_1(self):
         # the same test as test_call_some_args, with two extra lines
         # in the middle that trigger caching in f._ptr, which then
@@ -428,6 +473,16 @@
         u = dll.ret_un_func(a[1])
         assert u.y == 33*10000
 
+    def test_cache_funcptr(self):
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        assert tf_b(-126) == -42
+        ptr = tf_b._ptr
+        assert ptr is not None
+        assert tf_b(-126) == -42
+        assert tf_b._ptr is ptr
+
     def test_warnings(self):
         import warnings
         warnings.simplefilter("always")
@@ -439,6 +494,22 @@
             assert "C function without declared arguments called" in str(w[0].message)
             assert "C function without declared return type called" in str(w[1].message)
 
+    def test_errcheck(self):
+        py.test.skip('fixme')
+        def errcheck(result, func, args):
+            assert result == -42
+            assert type(result) is int
+            arg, = args
+            assert arg == -126
+            assert type(arg) is int
+            return result
+        #
+        tf_b = dll.tf_b
+        tf_b.restype = c_byte
+        tf_b.argtypes = (c_byte,)
+        tf_b.errcheck = errcheck
+        assert tf_b(-126) == -42
+        del tf_b.errcheck
         with warnings.catch_warnings(record=True) as w:
             dll.get_an_integer.argtypes = []
             dll.get_an_integer()
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_guess_argtypes.py
@@ -12,8 +12,10 @@
     from _ctypes.function import CFuncPtr
 
     def guess(value):
-        cobj = CFuncPtr._conv_param(None, value)
-        return type(cobj)
+        _, cobj, ctype = CFuncPtr._conv_param(None, value)
+        return ctype
+        ## cobj = CFuncPtr._conv_param(None, value)
+        ## return type(cobj)
 
     assert guess(13) == c_int
     assert guess(0) == c_int
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_numbers.py
@@ -125,6 +125,9 @@
             if t is c_longdouble:   # no support for 'g' in the struct module
                 continue
             code = t._type_ # the typecode
+            if code == 'g':
+                # typecode not supported by "struct"
+                continue
             align = struct.calcsize("c%c" % code) - struct.calcsize(code)
 
             # alignment of the type...
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_pointers.py
@@ -12,6 +12,13 @@
     mod._ctypes_test = str(conftest.sofile)
 
 class TestPointers(BaseCTypesTestChecker):
+
+    def test_get_ffi_argtype(self):
+        P = POINTER(c_int)
+        ffitype = P.get_ffi_argtype()
+        assert P.get_ffi_argtype() is ffitype
+        assert ffitype.deref_pointer() is c_int.get_ffi_argtype()
+    
     def test_pointer_crash(self):
 
         class A(POINTER(c_ulong)):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unicode.py
@@ -15,6 +15,10 @@
         mod.wcslen.argtypes = [ctypes.c_wchar_p]
         mod.func = dll._testfunc_p_p
 
+    def teardown_module(mod):
+        del mod.func
+        del mod.wcslen
+
     class TestUnicode(BaseCTypesTestChecker):
         def setup_method(self, method):
             self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict")
diff --git a/pypy/module/test_lib_pypy/test_pwd.py b/pypy/module/test_lib_pypy/test_pwd.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/test_pwd.py
@@ -0,0 +1,12 @@
+from pypy.conftest import gettestobjspace
+
+class AppTestPwd:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=('_ffi', '_rawffi'))
+        cls.space.appexec((), "(): import pwd")
+
+    def test_getpwuid(self):
+        import os, pwd
+        passwd_info = pwd.getpwuid(os.getuid())
+        assert type(passwd_info).__name__ == 'struct_passwd'
+        assert repr(passwd_info).startswith("pwd.struct_passwd(pw_name=")
diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py
--- a/pypy/objspace/descroperation.py
+++ b/pypy/objspace/descroperation.py
@@ -416,7 +416,7 @@
             # obscure circumstances.
             return default_identity_hash(space, w_obj)
         if space.is_w(w_hash, space.w_None):
-            typename = space.type(w_obj).getname(space, '?')
+            typename = space.type(w_obj).getname(space)
             raise operationerrfmt(space.w_TypeError,
                                   "'%s' objects are unhashable", typename)
         w_result = space.get_and_call_function(w_hash, w_obj)
diff --git a/pypy/objspace/flow/flowcontext.py b/pypy/objspace/flow/flowcontext.py
--- a/pypy/objspace/flow/flowcontext.py
+++ b/pypy/objspace/flow/flowcontext.py
@@ -384,8 +384,9 @@
     # hack for unrolling iterables, don't use this
     def replace_in_stack(self, oldvalue, newvalue):
         w_new = Constant(newvalue)
-        stack_items_w = self.crnt_frame.valuestack_w
-        for i in range(self.crnt_frame.valuestackdepth-1, -1, -1):
+        f = self.crnt_frame
+        stack_items_w = f.locals_stack_w
+        for i in range(f.valuestackdepth-1, f.nlocals-1, -1):
             w_v = stack_items_w[i]
             if isinstance(w_v, Constant):
                 if w_v.value is oldvalue:
diff --git a/pypy/objspace/flow/framestate.py b/pypy/objspace/flow/framestate.py
--- a/pypy/objspace/flow/framestate.py
+++ b/pypy/objspace/flow/framestate.py
@@ -10,7 +10,7 @@
     def __init__(self, state):
         if isinstance(state, PyFrame):
             # getfastscope() can return real None, for undefined locals
-            data = state.getfastscope() + state.savevaluestack()
+            data = state.save_locals_stack()
             if state.last_exception is None:
                 data.append(Constant(None))
                 data.append(Constant(None))
@@ -36,11 +36,9 @@
 
     def restoreframe(self, frame):
         if isinstance(frame, PyFrame):
-            fastlocals = len(frame.fastlocals_w)
             data = self.mergeable[:]
             recursively_unflatten(frame.space, data)
-            frame.setfastscope(data[:fastlocals])  # Nones == undefined locals
-            frame.restorevaluestack(data[fastlocals:-2])
+            frame.restore_locals_stack(data[:-2])  # Nones == undefined locals
             if data[-2] == Constant(None):
                 assert data[-1] == Constant(None)
                 frame.last_exception = None
diff --git a/pypy/objspace/flow/operation.py b/pypy/objspace/flow/operation.py
--- a/pypy/objspace/flow/operation.py
+++ b/pypy/objspace/flow/operation.py
@@ -143,9 +143,6 @@
 def mod_ovf(x, y):
     return ovfcheck(x % y)
 
-##def pow_ovf(*two_or_three_args):
-##    return ovfcheck(pow(*two_or_three_args))
-
 def lshift_ovf(x, y):
     return ovfcheck_lshift(x, y)
 
diff --git a/pypy/objspace/flow/test/test_framestate.py b/pypy/objspace/flow/test/test_framestate.py
--- a/pypy/objspace/flow/test/test_framestate.py
+++ b/pypy/objspace/flow/test/test_framestate.py
@@ -25,7 +25,7 @@
         dummy = Constant(None)
         #dummy.dummy = True
         arg_list = ([Variable() for i in range(formalargcount)] +
-                    [dummy] * (len(frame.fastlocals_w) - formalargcount))
+                    [dummy] * (frame.nlocals - formalargcount))
         frame.setfastscope(arg_list)
         return frame
 
@@ -42,7 +42,7 @@
     def test_neq_hacked_framestate(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs2 = FrameState(frame)
         assert fs1 != fs2
 
@@ -55,7 +55,7 @@
     def test_union_on_hacked_framestates(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs2 = FrameState(frame)
         assert fs1.union(fs2) == fs2  # fs2 is more general
         assert fs2.union(fs1) == fs2  # fs2 is more general
@@ -63,7 +63,7 @@
     def test_restore_frame(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs1.restoreframe(frame)
         assert fs1 == FrameState(frame)
 
@@ -82,25 +82,26 @@
     def test_getoutputargs(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs2 = FrameState(frame)
         outputargs = fs1.getoutputargs(fs2)
         # 'x' -> 'x' is a Variable
-        # fastlocals_w[-1] -> fastlocals_w[-1] is Constant(None)
-        assert outputargs == [frame.fastlocals_w[0], Constant(None)]
+        # locals_w[n-1] -> locals_w[n-1] is Constant(None)
+        assert outputargs == [frame.locals_stack_w[0], Constant(None)]
 
     def test_union_different_constants(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Constant(42)
+        frame.locals_stack_w[frame.nlocals-1] = Constant(42)
         fs2 = FrameState(frame)
         fs3 = fs1.union(fs2)
         fs3.restoreframe(frame)
-        assert isinstance(frame.fastlocals_w[-1], Variable) # generalized
+        assert isinstance(frame.locals_stack_w[frame.nlocals-1], Variable)
+                                 # ^^^ generalized
 
     def test_union_spectag(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Constant(SpecTag())
+        frame.locals_stack_w[frame.nlocals-1] = Constant(SpecTag())
         fs2 = FrameState(frame)
         assert fs1.union(fs2) is None   # UnionError
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -374,7 +374,7 @@
             raise operationerrfmt(
                 space.w_TypeError,
                 "sequence item %d: expected string, %s "
-                "found", i, space.type(w_s).getname(space, '?'))
+                "found", i, space.type(w_s).getname(space))
 
         if data and i != 0:
             newdata.extend(data)
diff --git a/pypy/objspace/std/celldict.py b/pypy/objspace/std/celldict.py
--- a/pypy/objspace/std/celldict.py
+++ b/pypy/objspace/std/celldict.py
@@ -4,8 +4,9 @@
 speed up global lookups a lot."""
 
 from pypy.objspace.std.dictmultiobject import IteratorImplementation
-from pypy.objspace.std.dictmultiobject import W_DictMultiObject, _is_sane_hash
-from pypy.rlib import jit
+from pypy.objspace.std.dictmultiobject import DictStrategy, _never_equal_to_string
+from pypy.objspace.std.dictmultiobject import ObjectDictStrategy
+from pypy.rlib import jit, rerased
 
 class ModuleCell(object):
     def __init__(self, w_value=None):
@@ -19,49 +20,59 @@
     def __repr__(self):
         return "<ModuleCell: %s>" % (self.w_value, )
 
-class ModuleDictImplementation(W_DictMultiObject):
+class ModuleDictStrategy(DictStrategy):
+
+    erase, unerase = rerased.new_erasing_pair("modulecell")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
+
     def __init__(self, space):
         self.space = space
-        self.content = {}
 
-    def getcell(self, key, makenew):
+    def get_empty_storage(self):
+       return self.erase({})
+
+    def getcell(self, w_dict, key, makenew):
         if makenew or jit.we_are_jitted():
             # when we are jitting, we always go through the pure function
             # below, to ensure that we have no residual dict lookup
-            self = jit.hint(self, promote=True)
-            return self._getcell_makenew(key)
-        return self.content.get(key, None)
+            w_dict = jit.promote(w_dict)
+            self = jit.promote(self)
+            return self._getcell_makenew(w_dict, key)
+        return self.unerase(w_dict.dstorage).get(key, None)
 
-    @jit.purefunction
-    def _getcell_makenew(self, key):
-        return self.content.setdefault(key, ModuleCell())
+    @jit.elidable
+    def _getcell_makenew(self, w_dict, key):
+        return self.unerase(w_dict.dstorage).setdefault(key, ModuleCell())
 
-    def impl_setitem(self, w_key, w_value):
+    def setitem(self, w_dict, w_key, w_value):
         space = self.space
         if space.is_w(space.type(w_key), space.w_str):
-            self.impl_setitem_str(self.space.str_w(w_key), w_value)
+            self.setitem_str(w_dict, self.space.str_w(w_key), w_value)
         else:
-            self._as_rdict().impl_fallback_setitem(w_key, w_value)
+            self.switch_to_object_strategy(w_dict)
+            w_dict.setitem(w_key, w_value)
 
-    def impl_setitem_str(self, name, w_value):
-        self.getcell(name, True).w_value = w_value
+    def setitem_str(self, w_dict, key, w_value):
+        self.getcell(w_dict, key, True).w_value = w_value
 
-    def impl_setdefault(self, w_key, w_default):
+    def setdefault(self, w_dict, w_key, w_default):
         space = self.space
         if space.is_w(space.type(w_key), space.w_str):
-            cell = self.getcell(space.str_w(w_key), True)
+            cell = self.getcell(w_dict, space.str_w(w_key), True)
             if cell.w_value is None:
                 cell.w_value = w_default
             return cell.w_value
         else:
-            return self._as_rdict().impl_fallback_setdefault(w_key, w_default)
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.setdefault(w_key, w_default)
 
-    def impl_delitem(self, w_key):
+    def delitem(self, w_dict, w_key):
         space = self.space
         w_key_type = space.type(w_key)
         if space.is_w(w_key_type, space.w_str):
             key = space.str_w(w_key)
-            cell = self.getcell(key, False)
+            cell = self.getcell(w_dict, key, False)
             if cell is None or cell.w_value is None:
                 raise KeyError
             # note that we don't remove the cell from self.content, to make
@@ -69,75 +80,91 @@
             # maps to the same cell later (even if this cell no longer
             # represents a key)
             cell.invalidate()
-        elif _is_sane_hash(space, w_key_type):
+        elif _never_equal_to_string(space, w_key_type):
             raise KeyError
         else:
-            self._as_rdict().impl_fallback_delitem(w_key)
-        
-    def impl_length(self):
+            self.switch_to_object_strategy(w_dict)
+            w_dict.delitem(w_key)
+
+    def length(self, w_dict):
         # inefficient, but do we care?
         res = 0
-        for cell in self.content.itervalues():
+        for cell in self.unerase(w_dict.dstorage).itervalues():
             if cell.w_value is not None:
                 res += 1
         return res
 
-    def impl_getitem(self, w_lookup):
+    def getitem(self, w_dict, w_key):
         space = self.space
-        w_lookup_type = space.type(w_lookup)
+        w_lookup_type = space.type(w_key)
         if space.is_w(w_lookup_type, space.w_str):
-            return self.impl_getitem_str(space.str_w(w_lookup))
+            return self.getitem_str(w_dict, space.str_w(w_key))
 
-        elif _is_sane_hash(space, w_lookup_type):
+        elif _never_equal_to_string(space, w_lookup_type):
             return None
         else:
-            return self._as_rdict().impl_fallback_getitem(w_lookup)
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.getitem(w_key)
 
-    def impl_getitem_str(self, lookup):
-        res = self.getcell(lookup, False)
+    def getitem_str(self, w_dict, key):
+        res = self.getcell(w_dict, key, False)
         if res is None:
             return None
         # note that even if the res.w_value is None, the next line is fine
         return res.w_value
 
-    def impl_iter(self):
-        return ModuleDictIteratorImplementation(self.space, self)
+    def iter(self, w_dict):
+        return ModuleDictIteratorImplementation(self.space, self, w_dict)
 
-    def impl_keys(self):
+    def keys(self, w_dict):
         space = self.space
-        return [space.wrap(key) for key, cell in self.content.iteritems()
+        iterator = self.unerase(w_dict.dstorage).iteritems
+        return [space.wrap(key) for key, cell in iterator()
                     if cell.w_value is not None]
 
-    def impl_values(self):
-        return [cell.w_value for cell in self.content.itervalues()
+    def values(self, w_dict):
+        iterator = self.unerase(w_dict.dstorage).itervalues
+        return [cell.w_value for cell in iterator()
                     if cell.w_value is not None]
 
-    def impl_items(self):
+    def items(self, w_dict):
         space = self.space
+        iterator = self.unerase(w_dict.dstorage).iteritems
         return [space.newtuple([space.wrap(key), cell.w_value])
-                    for (key, cell) in self.content.iteritems()
+                    for (key, cell) in iterator()
                         if cell.w_value is not None]
 
-    def impl_clear(self):
-        for k, cell in self.content.iteritems():
+    def clear(self, w_dict):
+        iterator = self.unerase(w_dict.dstorage).iteritems
+        for k, cell in iterator():
             cell.invalidate()
 
-    def _as_rdict(self):
-        r_dict_content = self.initialize_as_rdict()
-        for k, cell in self.content.iteritems():
+    def popitem(self, w_dict):
+        # This is O(n) if called repeatadly, you probably shouldn't be on a
+        # Module's dict though
+        for k, cell in self.unerase(w_dict.dstorage).iteritems():
             if cell.w_value is not None:
-                r_dict_content[self.space.wrap(k)] = cell.w_value
-            cell.invalidate()
-        self._clear_fields()
-        return self
+                w_value = cell.w_value
+                cell.invalidate()
+                return self.space.wrap(k), w_value
+        else:
+            raise KeyError
 
-    def _clear_fields(self):
-        self.content = None
+    def switch_to_object_strategy(self, w_dict):
+        d = self.unerase(w_dict.dstorage)
+        strategy = self.space.fromcache(ObjectDictStrategy)
+        d_new = strategy.unerase(strategy.get_empty_storage())
+        for key, cell in d.iteritems():
+            if cell.w_value is not None:
+                d_new[self.space.wrap(key)] = cell.w_value
+        w_dict.strategy = strategy
+        w_dict.dstorage = strategy.erase(d_new)
 
 class ModuleDictIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
+    def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
-        self.iterator = dictimplementation.content.iteritems()
+        dict_w = strategy.unerase(dictimplementation.dstorage)
+        self.iterator = dict_w.iteritems()
 
     def next_entry(self):
         for key, cell in self.iterator:
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1,18 +1,20 @@
 import py, sys
 from pypy.objspace.std.model import registerimplementation, W_Object
 from pypy.objspace.std.register_all import register_all
+from pypy.objspace.std.settype import set_typedef as settypedef
 from pypy.interpreter import gateway
 from pypy.interpreter.argument import Signature
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.module.__builtin__.__init__ import BUILTIN_TO_INDEX, OPTIMIZED_BUILTINS
 
-from pypy.rlib.objectmodel import r_dict, we_are_translated
-from pypy.objspace.std.settype import set_typedef as settypedef
+from pypy.rlib.objectmodel import r_dict, we_are_translated, specialize
+from pypy.rlib.debug import mark_dict_non_null
+
+from pypy.rlib import rerased
 
 def _is_str(space, w_key):
     return space.is_w(space.type(w_key), space.w_str)
 
-def _is_sane_hash(space, w_lookup_type):
+def _never_equal_to_string(space, w_lookup_type):
     """ Handles the case of a non string key lookup.
     Types that have a sane hash/eq function should allow us to return True
     directly to signal that the key is not in the dict in any case.
@@ -28,48 +30,38 @@
 class W_DictMultiObject(W_Object):
     from pypy.objspace.std.dicttype import dict_typedef as typedef
 
-    r_dict_content = None
-
     @staticmethod
     def allocate_and_init_instance(space, w_type=None, module=False,
                                    instance=False, classofinstance=None,
                                    strdict=False):
+
         if space.config.objspace.std.withcelldict and module:
-            from pypy.objspace.std.celldict import ModuleDictImplementation
+            from pypy.objspace.std.celldict import ModuleDictStrategy
             assert w_type is None
-            return ModuleDictImplementation(space)
-        elif space.config.objspace.opcodes.CALL_LIKELY_BUILTIN and module:
-            assert w_type is None
-            return WaryDictImplementation(space)
-        elif space.config.objspace.std.withdictmeasurement:
-            assert w_type is None
-            return MeasuringDictImplementation(space)
+            strategy = space.fromcache(ModuleDictStrategy)
+
         elif instance or strdict or module:
             assert w_type is None
-            return StrDictImplementation(space)
+            strategy = space.fromcache(StringDictStrategy)
+
         else:
-            if w_type is None:
-                w_type = space.w_dict
-            w_self = space.allocate_instance(W_DictMultiObject, w_type)
-            W_DictMultiObject.__init__(w_self, space)
-            return w_self
+            strategy = space.fromcache(EmptyDictStrategy)
 
-    def __init__(self, space):
+        if w_type is None:
+            w_type = space.w_dict
+        storage = strategy.get_empty_storage()
+        w_self = space.allocate_instance(W_DictMultiObject, w_type)
+        W_DictMultiObject.__init__(w_self, space, strategy, storage)
+        return w_self
+
+    def __init__(self, space, strategy, storage):
         self.space = space
-
-    def initialize_as_rdict(self):
-        assert self.r_dict_content is None
-        self.r_dict_content = r_dict(self.space.eq_w, self.space.hash_w)
-        return self.r_dict_content
-
-
-    def initialize_content(w_self, list_pairs_w):
-        for w_k, w_v in list_pairs_w:
-            w_self.setitem(w_k, w_v)
+        self.strategy = strategy
+        self.dstorage = storage
 
     def __repr__(w_self):
         """ representation for debugging purposes """
-        return "%s()" % (w_self.__class__.__name__, )
+        return "%s(%s)" % (w_self.__class__.__name__, w_self.strategy)
 
     def unwrap(w_dict, space):
         result = {}
@@ -88,51 +80,38 @@
         else:
             return None
 
-    # _________________________________________________________________
-    # implementation methods
-    def impl_getitem(self, w_key):
-        #return w_value or None
-        # in case the key is unhashable, try to hash it
-        self.space.hash(w_key)
-        # return None anyway
-        return None
+    def initialize_content(w_self, list_pairs_w):
+        for w_k, w_v in list_pairs_w:
+            w_self.setitem(w_k, w_v)
 
-    def impl_getitem_str(self, key):
-        #return w_value or None
-        return None
+def _add_indirections():
+    dict_methods = "setitem setitem_str getitem \
+                    getitem_str delitem length \
+                    clear keys values \
+                    items iter setdefault \
+                    popitem".split()
 
-    def impl_setdefault(self, w_key, w_default):
-        # here the dict is always empty
-        self._as_rdict().impl_fallback_setitem(w_key, w_default)
-        return w_default
+    def make_method(method):
+        def f(self, *args):
+            return getattr(self.strategy, method)(self, *args)
+        f.func_name = method
+        return f
 
-    def impl_setitem(self, w_key, w_value):
-        self._as_rdict().impl_fallback_setitem(w_key, w_value)
+    for method in dict_methods:
+        setattr(W_DictMultiObject, method, make_method(method))
 
-    def impl_setitem_str(self, key, w_value):
-        self._as_rdict().impl_fallback_setitem_str(key, w_value)
+_add_indirections()
 
-    def impl_delitem(self, w_key):
-        # in case the key is unhashable, try to hash it
-        self.space.hash(w_key)
-        raise KeyError
+class DictStrategy(object):
 
-    def impl_length(self):
-        return 0
+    def __init__(self, space):
+        self.space = space
 
-    def impl_iter(self):
-        # XXX I guess it's not important to be fast in this case?
-        return self._as_rdict().impl_fallback_iter()
+    def get_empty_storage(self):
+        raise NotImplementedError
 
-    def impl_clear(self):
-        self.r_dict_content = None
-
-    def _as_rdict(self):
-        r_dict_content = self.initialize_as_rdict()
-        return self
-
-    def impl_keys(self):
-        iterator = self.impl_iter()
+    def keys(self, w_dict):
+        iterator = self.iter(w_dict)
         result = []
         while 1:
             w_key, w_value = iterator.next()
@@ -140,8 +119,9 @@
                 result.append(w_key)
             else:
                 return result
-    def impl_values(self):
-        iterator = self.impl_iter()
+
+    def values(self, w_dict):
+        iterator = self.iter(w_dict)
         result = []
         while 1:
             w_key, w_value = iterator.next()
@@ -149,8 +129,9 @@
                 result.append(w_value)
             else:
                 return result
-    def impl_items(self):
-        iterator = self.impl_iter()
+
+    def items(self, w_dict):
+        iterator = self.iter(w_dict)
         result = []
         while 1:
             w_key, w_value = iterator.next()
@@ -159,106 +140,90 @@
             else:
                 return result
 
-    # the following method only makes sense when the option to use the
-    # CALL_LIKELY_BUILTIN opcode is set. Otherwise it won't even be seen
-    # by the annotator
-    def impl_get_builtin_indexed(self, i):
-        key = OPTIMIZED_BUILTINS[i]
-        return self.impl_getitem_str(key)
+    def clear(self, w_dict):
+        strategy = self.space.fromcache(EmptyDictStrategy)
+        storage = strategy.get_empty_storage()
+        w_dict.strategy = strategy
+        w_dict.dstorage = storage
 
-    def impl_popitem(self):
-        # default implementation
-        space = self.space
-        iterator = self.impl_iter()
-        w_key, w_value = iterator.next()
-        if w_key is None:
-            raise KeyError
-        self.impl_delitem(w_key)
-        return w_key, w_value
 
-    # _________________________________________________________________
-    # fallback implementation methods
+class EmptyDictStrategy(DictStrategy):
 
-    def impl_fallback_setdefault(self, w_key, w_default):
-        return self.r_dict_content.setdefault(w_key, w_default)
+    erase, unerase = rerased.new_erasing_pair("empty")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
 
-    def impl_fallback_setitem(self, w_key, w_value):
-        self.r_dict_content[w_key] = w_value
+    def get_empty_storage(self):
+       return self.erase(None)
 
-    def impl_fallback_setitem_str(self, key, w_value):
-        return self.impl_fallback_setitem(self.space.wrap(key), w_value)
+    def switch_to_correct_strategy(self, w_dict, w_key):
+        #XXX implement other strategies later
+        if type(w_key) is self.space.StringObjectCls:
+            self.switch_to_string_strategy(w_dict)
+        elif self.space.is_w(self.space.type(w_key), self.space.w_int):
+            self.switch_to_int_strategy(w_dict)
+        else:
+            self.switch_to_object_strategy(w_dict)
 
-    def impl_fallback_delitem(self, w_key):
-        del self.r_dict_content[w_key]
+    def switch_to_string_strategy(self, w_dict):
+        strategy = self.space.fromcache(StringDictStrategy)
+        storage = strategy.get_empty_storage()
+        w_dict.strategy = strategy
+        w_dict.dstorage = storage
 
-    def impl_fallback_length(self):
-        return len(self.r_dict_content)
+    def switch_to_int_strategy(self, w_dict):
+        strategy = self.space.fromcache(IntDictStrategy)
+        storage = strategy.get_empty_storage()
+        w_dict.strategy = strategy
+        w_dict.dstorage = storage
 
-    def impl_fallback_getitem(self, w_key):
-        return self.r_dict_content.get(w_key, None)
+    def switch_to_object_strategy(self, w_dict):
+        strategy = self.space.fromcache(ObjectDictStrategy)
+        storage = strategy.get_empty_storage()
+        w_dict.strategy = strategy
+        w_dict.dstorage = storage
 
-    def impl_fallback_getitem_str(self, key):
-        return self.r_dict_content.get(self.space.wrap(key), None)
+    def getitem(self, w_dict, w_key):
+        #return w_value or None
+        # in case the key is unhashable, try to hash it
+        self.space.hash(w_key)
+        # return None anyway
+        return None
 
-    def impl_fallback_iter(self):
-        return RDictIteratorImplementation(self.space, self)
+    def getitem_str(self, w_dict, key):
+        #return w_value or None
+        return None
 
-    def impl_fallback_keys(self):
-        return self.r_dict_content.keys()
-    def impl_fallback_values(self):
-        return self.r_dict_content.values()
-    def impl_fallback_items(self):
-        return [self.space.newtuple([w_key, w_val])
-                    for w_key, w_val in self.r_dict_content.iteritems()]
+    def setdefault(self, w_dict, w_key, w_default):
+        # here the dict is always empty
+        self.switch_to_correct_strategy(w_dict, w_key)
+        w_dict.setitem(w_key, w_default)
+        return w_default
 
-    def impl_fallback_clear(self):
-        self.r_dict_content.clear()
+    def setitem(self, w_dict, w_key, w_value):
+        self.switch_to_correct_strategy(w_dict, w_key)
+        w_dict.setitem(w_key, w_value)
 
-    def impl_fallback_get_builtin_indexed(self, i):
-        key = OPTIMIZED_BUILTINS[i]
-        return self.impl_fallback_getitem_str(key)
+    def setitem_str(self, w_dict, key, w_value):
+        self.switch_to_string_strategy(w_dict)
+        w_dict.setitem_str(key, w_value)
 
-    def impl_fallback_popitem(self):
-        return self.r_dict_content.popitem()
+    def delitem(self, w_dict, w_key):
+        # in case the key is unhashable, try to hash it
+        self.space.hash(w_key)
+        raise KeyError
 
+    def length(self, w_dict):
+        return 0
 
-implementation_methods = [
-    ("getitem", 1),
-    ("getitem_str", 1),
-    ("length", 0),
-    ("setitem_str", 2),
-    ("setitem", 2),
-    ("setdefault", 2),
-    ("delitem", 1),
-    ("iter", 0),
-    ("items", 0),
-    ("values", 0),
-    ("keys", 0),
-    ("clear", 0),
-    ("get_builtin_indexed", 1),
-    ("popitem", 0),
-]
+    def iter(self, w_dict):
+        return EmptyIteratorImplementation(self.space, w_dict)
 
+    def clear(self, w_dict):
+        return
 
-def _make_method(name, implname, fallback, numargs):
-    args = ", ".join(["a" + str(i) for i in range(numargs)])
-    code = """def %s(self, %s):
-        if self.r_dict_content is not None:
-            return self.%s(%s)
-        return self.%s(%s)""" % (name, args, fallback, args, implname, args)
-    d = {}
-    exec py.code.Source(code).compile() in d
-    implementation_method = d[name]
-    implementation_method.func_defaults = getattr(W_DictMultiObject, implname).func_defaults
-    return implementation_method
-
-def _install_methods():
-    for name, numargs in implementation_methods:
-        implname = "impl_" + name
-        fallbackname = "impl_fallback_" + name
-        func = _make_method(name, implname, fallbackname, numargs)
-        setattr(W_DictMultiObject, name, func)
-_install_methods()
+    def popitem(self, w_dict):
+        raise KeyError
 
 registerimplementation(W_DictMultiObject)
 
@@ -300,319 +265,255 @@
             return self.len - self.pos
         return 0
 
+class EmptyIteratorImplementation(IteratorImplementation):
+    def next(self):
+        return (None, None)
+
 
 
 # concrete subclasses of the above
 
-class StrDictImplementation(W_DictMultiObject):
-    def __init__(self, space):
-        self.space = space
-        self.content = {}
+class AbstractTypedStrategy(object):
+    _mixin_ = True
 
-    def impl_setitem(self, w_key, w_value):
+    @staticmethod
+    def erase(storage):
+        raise NotImplementedError("abstract base class")
+
+    @staticmethod
+    def unerase(obj):
+        raise NotImplementedError("abstract base class")
+
+    def wrap(self, unwrapped):
+        raise NotImplementedError
+
+    def unwrap(self, wrapped):
+        raise NotImplementedError
+
+    def is_correct_type(self, w_obj):
+        raise NotImplementedError("abstract base class")
+
+    def get_empty_storage(self):
+        raise NotImplementedError("abstract base class")
+
+    def _never_equal_to(self, w_lookup_type):
+        raise NotImplementedError("abstract base class")
+
+    def setitem(self, w_dict, w_key, w_value):
         space = self.space
-        if space.is_w(space.type(w_key), space.w_str):
-            self.impl_setitem_str(self.space.str_w(w_key), w_value)
+        if self.is_correct_type(w_key):
+            self.unerase(w_dict.dstorage)[self.unwrap(w_key)] = w_value
+            return
         else:
-            self._as_rdict().impl_fallback_setitem(w_key, w_value)
+            self.switch_to_object_strategy(w_dict)
+            w_dict.setitem(w_key, w_value)
 
-    def impl_setitem_str(self, key, w_value):
-        self.content[key] = w_value
+    def setitem_str(self, w_dict, key, w_value):
+        self.switch_to_object_strategy(w_dict)
+        w_dict.setitem(self.space.wrap(key), w_value)
 
-    def impl_setdefault(self, w_key, w_default):
+    def setdefault(self, w_dict, w_key, w_default):
         space = self.space
-        if space.is_w(space.type(w_key), space.w_str):
-            return self.content.setdefault(space.str_w(w_key), w_default)
+        if self.is_correct_type(w_key):
+            return self.unerase(w_dict.dstorage).setdefault(self.unwrap(w_key), w_default)
         else:
-            return self._as_rdict().impl_fallback_setdefault(w_key, w_default)
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.setdefault(w_key, w_default)
 
-
-    def impl_delitem(self, w_key):
+    def delitem(self, w_dict, w_key):
         space = self.space
         w_key_type = space.type(w_key)
-        if space.is_w(w_key_type, space.w_str):
-            del self.content[space.str_w(w_key)]
+        if self.is_correct_type(w_key):
+            del self.unerase(w_dict.dstorage)[self.unwrap(w_key)]
             return
-        elif _is_sane_hash(space, w_key_type):
-            raise KeyError
         else:
-            self._as_rdict().impl_fallback_delitem(w_key)
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.delitem(w_key)
 
-    def impl_length(self):
-        return len(self.content)
+    def length(self, w_dict):
+        return len(self.unerase(w_dict.dstorage))
 
-    def impl_getitem_str(self, key):
-        return self.content.get(key, None)
+    def getitem_str(self, w_dict, key):
+        return self.getitem(w_dict, self.space.wrap(key))
 
-    def impl_getitem(self, w_key):
+    def getitem(self, w_dict, w_key):
+        space = self.space
+
+        if self.is_correct_type(w_key):
+            return self.unerase(w_dict.dstorage).get(self.unwrap(w_key), None)
+        elif self._never_equal_to(space.type(w_key)):
+            return None
+        else:
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.getitem(w_key)
+
+    def keys(self, w_dict):
+        return [self.wrap(key) for key in self.unerase(w_dict.dstorage).iterkeys()]
+
+    def values(self, w_dict):
+        return self.unerase(w_dict.dstorage).values()
+
+    def items(self, w_dict):
+        space = self.space
+        dict_w = self.unerase(w_dict.dstorage)
+        return [space.newtuple([self.wrap(key), w_value])
+                    for (key, w_value) in dict_w.iteritems()]
+
+    def popitem(self, w_dict):
+        key, value = self.unerase(w_dict.dstorage).popitem()
+        return (self.wrap(key), value)
+
+    def clear(self, w_dict):
+        self.unerase(w_dict.dstorage).clear()
+
+    def switch_to_object_strategy(self, w_dict):
+        d = self.unerase(w_dict.dstorage)
+        strategy = self.space.fromcache(ObjectDictStrategy)
+        d_new = strategy.unerase(strategy.get_empty_storage())
+        for key, value in d.iteritems():
+            d_new[self.wrap(key)] = value
+        w_dict.strategy = strategy
+        w_dict.dstorage = strategy.erase(d_new)
+
+class ObjectDictStrategy(AbstractTypedStrategy, DictStrategy):
+
+    erase, unerase = rerased.new_erasing_pair("object")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
+
+    def wrap(self, unwrapped):
+        return unwrapped
+
+    def unwrap(self, wrapped):
+        return wrapped
+
+    def is_correct_type(self, w_obj):
+        return True
+
+    def get_empty_storage(self):
+       new_dict = r_dict(self.space.eq_w, self.space.hash_w,
+                         force_non_null=True)
+       return self.erase(new_dict)
+
+    def _never_equal_to(self, w_lookup_type):
+        return False
+
+    def iter(self, w_dict):
+        return ObjectIteratorImplementation(self.space, self, w_dict)
+
+    def keys(self, w_dict):
+        return self.unerase(w_dict.dstorage).keys()
+
+class StringDictStrategy(AbstractTypedStrategy, DictStrategy):
+
+    erase, unerase = rerased.new_erasing_pair("string")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
+
+    def wrap(self, unwrapped):
+        return self.space.wrap(unwrapped)
+
+    def unwrap(self, wrapped):
+        return self.space.str_w(wrapped)
+
+    def is_correct_type(self, w_obj):
+        space = self.space
+        return space.is_w(space.type(w_obj), space.w_str)
+
+    def get_empty_storage(self):
+        res = {}
+        mark_dict_non_null(res)
+        return self.erase(res)
+
+    def _never_equal_to(self, w_lookup_type):
+        return _never_equal_to_string(self.space, w_lookup_type)
+
+    def setitem_str(self, w_dict, key, w_value):
+        assert key is not None
+        self.unerase(w_dict.dstorage)[key] = w_value
+
+    def getitem(self, w_dict, w_key):
         space = self.space
         # -- This is called extremely often.  Hack for performance --
         if type(w_key) is space.StringObjectCls:
-            return self.impl_getitem_str(w_key.unwrap(space))
+            return self.getitem_str(w_dict, w_key.unwrap(space))
         # -- End of performance hack --
-        w_lookup_type = space.type(w_key)
-        if space.is_w(w_lookup_type, space.w_str):
-            return self.impl_getitem_str(space.str_w(w_key))
-        elif _is_sane_hash(space, w_lookup_type):
-            return None
-        else:
-            return self._as_rdict().impl_fallback_getitem(w_key)
+        return AbstractTypedStrategy.getitem(self, w_dict, w_key)
 
-    def impl_iter(self):
-        return StrIteratorImplementation(self.space, self)
+    def getitem_str(self, w_dict, key):
+        assert key is not None
+        return self.unerase(w_dict.dstorage).get(key, None)
 
-    def impl_keys(self):
-        space = self.space
-        return [space.wrap(key) for key in self.content.iterkeys()]
+    def iter(self, w_dict):
+        return StrIteratorImplementation(self.space, self, w_dict)
 
-    def impl_values(self):
-        return self.content.values()
-
-    def impl_items(self):
-        space = self.space
-        return [space.newtuple([space.wrap(key), w_value])
-                    for (key, w_value) in self.content.iteritems()]
-
-    def impl_clear(self):
-        self.content.clear()
-
-
-    def _as_rdict(self):
-        r_dict_content = self.initialize_as_rdict()
-        for k, w_v in self.content.items():
-            r_dict_content[self.space.wrap(k)] = w_v
-        self._clear_fields()
-        return self
-
-    def _clear_fields(self):
-        self.content = None
 
 class StrIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
+    def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
-        self.iterator = dictimplementation.content.iteritems()
+        self.iterator = strategy.unerase(dictimplementation.dstorage).iteritems()
 
     def next_entry(self):
         # note that this 'for' loop only runs once, at most
-        for str, w_value in self.iterator:
-            return self.space.wrap(str), w_value
+        for key, w_value in self.iterator:
+            return self.space.wrap(key), w_value
         else:
             return None, None
 
 
-class WaryDictImplementation(StrDictImplementation):
-    def __init__(self, space):
-        StrDictImplementation.__init__(self, space)
-        self.shadowed = [None] * len(BUILTIN_TO_INDEX)
+class IntDictStrategy(AbstractTypedStrategy, DictStrategy):
+    erase, unerase = rerased.new_erasing_pair("int")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
 
-    def impl_setitem_str(self, key, w_value):
-        i = BUILTIN_TO_INDEX.get(key, -1)
-        if i != -1:
-            self.shadowed[i] = w_value
-        self.content[key] = w_value
+    def wrap(self, unwrapped):
+        return self.space.wrap(unwrapped)
 
-    def impl_delitem(self, w_key):
+    def unwrap(self, wrapped):
+        return self.space.int_w(wrapped)
+
+    def get_empty_storage(self):
+        return self.erase({})
+
+    def is_correct_type(self, w_obj):
         space = self.space
-        w_key_type = space.type(w_key)
-        if space.is_w(w_key_type, space.w_str):
-            key = space.str_w(w_key)
-            del self.content[key]
-            i = BUILTIN_TO_INDEX.get(key, -1)
-            if i != -1:
-                self.shadowed[i] = None
-        elif _is_sane_hash(space, w_key_type):
-            raise KeyError
-        else:
-            self._as_rdict().impl_fallback_delitem(w_key)
+        return space.is_w(space.type(w_obj), space.w_int)
 
-    def impl_get_builtin_indexed(self, i):
-        return self.shadowed[i]
+    def _never_equal_to(self, w_lookup_type):
+        space = self.space
+        # XXX there are many more types
+        return (space.is_w(w_lookup_type, space.w_NoneType) or
+                space.is_w(w_lookup_type, space.w_str) or
+                space.is_w(w_lookup_type, space.w_unicode)
+                )
 
+    def iter(self, w_dict):
+        return IntIteratorImplementation(self.space, self, w_dict)
 
-class RDictIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
+class IntIteratorImplementation(IteratorImplementation):
+    def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
-        self.iterator = dictimplementation.r_dict_content.iteritems()
+        self.iterator = strategy.unerase(dictimplementation.dstorage).iteritems()
 
     def next_entry(self):
         # note that this 'for' loop only runs once, at most
-        for item in self.iterator:
-            return item
+        for key, w_value in self.iterator:
+            return self.space.wrap(key), w_value
         else:
             return None, None
 
 
+class ObjectIteratorImplementation(IteratorImplementation):
+    def __init__(self, space, strategy, dictimplementation):
+        IteratorImplementation.__init__(self, space, dictimplementation)
+        self.iterator = strategy.unerase(dictimplementation.dstorage).iteritems()
 
-# XXX fix this thing
-import time
-
-class DictInfo(object):
-    _dict_infos = []
-    def __init__(self):
-        self.id = len(self._dict_infos)
-
-        self.setitem_strs = 0; self.setitems = 0;  self.delitems = 0
-        self.lengths = 0;   self.gets = 0
-        self.iteritems = 0; self.iterkeys = 0; self.itervalues = 0
-        self.keys = 0;      self.values = 0;   self.items = 0
-
-        self.maxcontents = 0
-
-        self.reads = 0
-        self.hits = self.misses = 0
-        self.writes = 0
-        self.iterations = 0
-        self.listings = 0
-
-        self.seen_non_string_in_write = 0
-        self.seen_non_string_in_read_first = 0
-        self.size_on_non_string_seen_in_read = -1
-        self.size_on_non_string_seen_in_write = -1
-
-        self.createtime = time.time()
-        self.lifetime = -1.0
-
-        if not we_are_translated():
-            # very probable stack from here:
-            # 0 - us
-            # 1 - MeasuringDictImplementation.__init__
-            # 2 - W_DictMultiObject.__init__
-            # 3 - space.newdict
-            # 4 - newdict's caller.  let's look at that
-            try:
-                frame = sys._getframe(4)
-            except ValueError:
-                pass # might be at import time
-            else:
-                self.sig = '(%s:%s)%s'%(frame.f_code.co_filename, frame.f_lineno, frame.f_code.co_name)
-
-        self._dict_infos.append(self)
-    def __repr__(self):
-        args = []
-        for k in sorted(self.__dict__):
-            v = self.__dict__[k]
-            if v != 0:
-                args.append('%s=%r'%(k, v))
-        return '<DictInfo %s>'%(', '.join(args),)
-
-class OnTheWayOut:
-    def __init__(self, info):
-        self.info = info
-    def __del__(self):
-        self.info.lifetime = time.time() - self.info.createtime
-
-class MeasuringDictImplementation(W_DictMultiObject):
-    def __init__(self, space):
-        self.space = space
-        self.content = r_dict(space.eq_w, space.hash_w)
-        self.info = DictInfo()
-        self.thing_with_del = OnTheWayOut(self.info)
-
-    def __repr__(self):
-        return "%s<%s>" % (self.__class__.__name__, self.content)
-
-    def _is_str(self, w_key):
-        space = self.space
-        return space.is_true(space.isinstance(w_key, space.w_str))
-    def _read(self, w_key):
-        self.info.reads += 1
-        if not self.info.seen_non_string_in_write \
-               and not self.info.seen_non_string_in_read_first \
-               and not self._is_str(w_key):
-            self.info.seen_non_string_in_read_first = True
-            self.info.size_on_non_string_seen_in_read = len(self.content)
-        hit = w_key in self.content
-        if hit:
-            self.info.hits += 1
+    def next_entry(self):
+        # note that this 'for' loop only runs once, at most
+        for w_key, w_value in self.iterator:
+            return w_key, w_value
         else:
-            self.info.misses += 1
-
-    def impl_setitem(self, w_key, w_value):
-        if not self.info.seen_non_string_in_write and not self._is_str(w_key):
-            self.info.seen_non_string_in_write = True
-            self.info.size_on_non_string_seen_in_write = len(self.content)
-        self.info.setitems += 1
-        self.info.writes += 1
-        self.content[w_key] = w_value
-        self.info.maxcontents = max(self.info.maxcontents, len(self.content))
-    def impl_setitem_str(self, key, w_value):
-        self.info.setitem_strs += 1
-        self.impl_setitem(self.space.wrap(key), w_value)
-    def impl_delitem(self, w_key):
-        if not self.info.seen_non_string_in_write \
-               and not self.info.seen_non_string_in_read_first \
-               and not self._is_str(w_key):
-            self.info.seen_non_string_in_read_first = True
-            self.info.size_on_non_string_seen_in_read = len(self.content)
-        self.info.delitems += 1
-        self.info.writes += 1
-        del self.content[w_key]
-
-    def impl_length(self):
-        self.info.lengths += 1
-        return len(self.content)
-    def impl_getitem_str(self, key):
-        return self.impl_getitem(self.space.wrap(key))
-    def impl_getitem(self, w_key):
-        self.info.gets += 1
-        self._read(w_key)
-        return self.content.get(w_key, None)
-
-    def impl_iteritems(self):
-        self.info.iteritems += 1
-        self.info.iterations += 1
-        return RDictItemIteratorImplementation(self.space, self)
-    def impl_iterkeys(self):
-        self.info.iterkeys += 1
-        self.info.iterations += 1
-        return RDictKeyIteratorImplementation(self.space, self)
-    def impl_itervalues(self):
-        self.info.itervalues += 1
-        self.info.iterations += 1
-        return RDictValueIteratorImplementation(self.space, self)
-
-    def impl_keys(self):
-        self.info.keys += 1
-        self.info.listings += 1
-        return self.content.keys()
-    def impl_values(self):
-        self.info.values += 1
-        self.info.listings += 1
-        return self.content.values()
-    def impl_items(self):
-        self.info.items += 1
-        self.info.listings += 1
-        return [self.space.newtuple([w_key, w_val])
-                    for w_key, w_val in self.content.iteritems()]
-
-
-_example = DictInfo()
-del DictInfo._dict_infos[-1]
-tmpl = 'os.write(fd, "%(attr)s" + ": " + str(info.%(attr)s) + "\\n")'
-bodySrc = []
-for attr in sorted(_example.__dict__):
-    if attr == 'sig':
-        continue
-    bodySrc.append(tmpl%locals())
-exec py.code.Source('''
-from pypy.rlib.objectmodel import current_object_addr_as_int
-def _report_one(fd, info):
-    os.write(fd, "_address" + ": " + str(current_object_addr_as_int(info))
-                 + "\\n")
-    %s
-'''%'\n    '.join(bodySrc)).compile()
-
-def report():
-    if not DictInfo._dict_infos:
-        return
-    os.write(2, "Starting multidict report.\n")
-    fd = os.open('dictinfo.txt', os.O_CREAT|os.O_WRONLY|os.O_TRUNC, 0644)
-    for info in DictInfo._dict_infos:
-        os.write(fd, '------------------\n')
-        _report_one(fd, info)
-    os.close(fd)
-    os.write(2, "Reporting done.\n")
-
+            return None, None
 
 
 init_signature = Signature(['seq_or_map'], None, 'kwargs')
@@ -835,6 +736,8 @@
 class W_DictMultiIterObject(W_Object):
     from pypy.objspace.std.dicttype import dictiter_typedef as typedef
 
+    _immutable_fields_ = ["iteratorimplementation", "itertype"]
+
     def __init__(w_self, space, iteratorimplementation, itertype):
         w_self.space = space
         w_self.iteratorimplementation = iteratorimplementation
@@ -919,7 +822,7 @@
 def repr__DictViewKeys(space, w_dictview):
     w_seq = space.call_function(space.w_list, w_dictview)
     w_repr = space.repr(w_seq)
-    return space.wrap("%s(%s)" % (space.type(w_dictview).getname(space, "?"),
+    return space.wrap("%s(%s)" % (space.type(w_dictview).getname(space),
                                   space.str_w(w_repr)))
 repr__DictViewItems  = repr__DictViewKeys
 repr__DictViewValues = repr__DictViewKeys
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -1,96 +1,98 @@
 from pypy.objspace.std.model import registerimplementation, W_Object
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.dictmultiobject import W_DictMultiObject, IteratorImplementation
+from pypy.objspace.std.dictmultiobject import DictStrategy
 from pypy.objspace.std.typeobject import unwrap_cell
 from pypy.interpreter.error import OperationError
 
+from pypy.rlib import rerased
 
-class W_DictProxyObject(W_DictMultiObject):
-    def __init__(w_self, space, w_type):
-        W_DictMultiObject.__init__(w_self, space)
-        w_self.w_type = w_type
 
-    def impl_getitem(self, w_lookup):
+class DictProxyStrategy(DictStrategy):
+
+    erase, unerase = rerased.new_erasing_pair("dictproxy")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
+
+    def __init__(w_self, space):
+        DictStrategy.__init__(w_self, space)
+
+    def getitem(self, w_dict, w_key):
         space = self.space
-        w_lookup_type = space.type(w_lookup)
+        w_lookup_type = space.type(w_key)
         if space.is_w(w_lookup_type, space.w_str):
-            return self.impl_getitem_str(space.str_w(w_lookup))
+            return self.getitem_str(w_dict, space.str_w(w_key))
         else:
             return None
 
-    def impl_getitem_str(self, lookup):
-        return self.w_type.getdictvalue(self.space, lookup)
+    def getitem_str(self, w_dict, key):
+        return self.unerase(w_dict.dstorage).getdictvalue(self.space, key)
 
-    def impl_setitem(self, w_key, w_value):
+    def setitem(self, w_dict, w_key, w_value):
         space = self.space
         if space.is_w(space.type(w_key), space.w_str):
-            self.impl_setitem_str(self.space.str_w(w_key), w_value)
+            self.setitem_str(w_dict, self.space.str_w(w_key), w_value)
         else:
             raise OperationError(space.w_TypeError, space.wrap("cannot add non-string keys to dict of a type"))
 
-    def impl_setitem_str(self, name, w_value):
+    def setitem_str(self, w_dict, key, w_value):
+        w_type = self.unerase(w_dict.dstorage)
         try:
-            self.w_type.setdictvalue(self.space, name, w_value)
+            w_type.setdictvalue(self.space, key, w_value)
         except OperationError, e:
             if not e.match(self.space, self.space.w_TypeError):
                 raise
-            w_type = self.w_type
             if not w_type.is_cpytype():
                 raise
             # xxx obscure workaround: allow cpyext to write to type->tp_dict.
             # xxx like CPython, we assume that this is only done early after
             # xxx the type is created, and we don't invalidate any cache.
-            w_type.dict_w[name] = w_value
+            w_type.dict_w[key] = w_value
 
-    def impl_setdefault(self, w_key, w_default):
+    def setdefault(self, w_dict, w_key, w_default):
         space = self.space
-        w_result = self.impl_getitem(w_key)
+        w_result = self.getitem(w_dict, w_key)
         if w_result is not None:
             return w_result
-        self.impl_setitem(w_key, w_default)
+        self.setitem(w_dict, w_key, w_default)
         return w_default
 
-    def impl_delitem(self, w_key):
+    def delitem(self, w_dict, w_key):
         space = self.space
         w_key_type = space.type(w_key)
         if space.is_w(w_key_type, space.w_str):
-            if not self.w_type.deldictvalue(space, w_key):
+            if not self.unerase(w_dict.dstorage).deldictvalue(space, w_key):
                 raise KeyError
         else:
             raise KeyError
 
-    def impl_length(self):
-        return len(self.w_type.dict_w)
+    def length(self, w_dict):
+        return len(self.unerase(w_dict.dstorage).dict_w)
 
-    def impl_iter(self):
-        return DictProxyIteratorImplementation(self.space, self)
+    def iter(self, w_dict):
+        return DictProxyIteratorImplementation(self.space, self, w_dict)
 
-    def impl_keys(self):
+    def keys(self, w_dict):
         space = self.space
-        return [space.wrap(key) for key in self.w_type.dict_w.iterkeys()]
+        return [space.wrap(key) for key in self.unerase(w_dict.dstorage).dict_w.iterkeys()]
 
-    def impl_values(self):
-        return [unwrap_cell(self.space, w_value) for w_value in self.w_type.dict_w.itervalues()]
+    def values(self, w_dict):
+        return [unwrap_cell(self.space, w_value) for w_value in self.unerase(w_dict.dstorage).dict_w.itervalues()]
 
-    def impl_items(self):
+    def items(self, w_dict):
         space = self.space
         return [space.newtuple([space.wrap(key), unwrap_cell(self.space, w_value)])
-                    for (key, w_value) in self.w_type.dict_w.iteritems()]
+                    for (key, w_value) in self.unerase(w_dict.dstorage).dict_w.iteritems()]
 
-    def impl_clear(self):
-        self.w_type.dict_w.clear()
-        self.w_type.mutated()
-
-    def _as_rdict(self):
-        assert 0, "should be unreachable"
-
-    def _clear_fields(self):
-        assert 0, "should be unreachable"
+    def clear(self, w_dict):
+        self.unerase(w_dict.dstorage).dict_w.clear()
+        self.unerase(w_dict.dstorage).mutated()
 
 class DictProxyIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
+    def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
-        self.iterator = dictimplementation.w_type.dict_w.iteritems()
+        w_type = strategy.unerase(dictimplementation.dstorage)
+        self.iterator = w_type.dict_w.iteritems()
 
     def next_entry(self):
         for key, w_value in self.iterator:
diff --git a/pypy/objspace/std/frame.py b/pypy/objspace/std/frame.py
--- a/pypy/objspace/std/frame.py
+++ b/pypy/objspace/std/frame.py
@@ -6,7 +6,7 @@
 from pypy.interpreter import pyopcode, function
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.module.__builtin__ import OPTIMIZED_BUILTINS, Module
+from pypy.module.__builtin__ import Module
 from pypy.objspace.std import intobject, smallintobject
 from pypy.objspace.std.multimethod import FailedToImplement
 from pypy.objspace.std.dictmultiobject import W_DictMultiObject
@@ -66,41 +66,6 @@
         w_result = f.space.getitem(w_1, w_2)
     f.pushvalue(w_result)
 
-def CALL_LIKELY_BUILTIN(f, oparg, next_instr):
-    w_globals = f.w_globals
-    num = oparg >> 8
-    assert isinstance(w_globals, W_DictMultiObject)
-    w_value = w_globals.get_builtin_indexed(num)
-    if w_value is None:
-        builtins = f.get_builtin()
-        assert isinstance(builtins, Module)
-        w_builtin_dict = builtins.getdict(f.space)
-        assert isinstance(w_builtin_dict, W_DictMultiObject)
-        w_value = w_builtin_dict.get_builtin_indexed(num)
-    if w_value is None:
-        varname = OPTIMIZED_BUILTINS[num]
-        message = "global name '%s' is not defined"
-        raise operationerrfmt(f.space.w_NameError,
-                              message, varname)
-    nargs = oparg & 0xff
-    w_function = w_value
-    try:
-        w_result = call_likely_builtin(f, w_function, nargs)
-    finally:
-        f.dropvalues(nargs)
-    f.pushvalue(w_result)
-
-def call_likely_builtin(f, w_function, nargs):
-    if isinstance(w_function, function.Function):
-        executioncontext = f.space.getexecutioncontext()
-        executioncontext.c_call_trace(f, w_function)
-        res = w_function.funccall_valuestack(nargs, f)
-        executioncontext.c_return_trace(f, w_function)
-        return res
-    args = f.make_arguments(nargs)
-    return f.space.call_args(w_function, args)
-
-
 compare_table = [
     "lt",   # "<"
     "le",   # "<="
@@ -145,8 +110,6 @@
             StdObjSpaceFrame.BINARY_ADD = int_BINARY_ADD
     if space.config.objspace.std.optimized_list_getitem:
         StdObjSpaceFrame.BINARY_SUBSCR = list_BINARY_SUBSCR
-    if space.config.objspace.opcodes.CALL_LIKELY_BUILTIN:
-        StdObjSpaceFrame.CALL_LIKELY_BUILTIN = CALL_LIKELY_BUILTIN
     if space.config.objspace.opcodes.CALL_METHOD:
         from pypy.objspace.std.callmethod import LOOKUP_METHOD, CALL_METHOD
         StdObjSpaceFrame.LOOKUP_METHOD = LOOKUP_METHOD
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -4,9 +4,9 @@
 from pypy.rlib import rerased
 
 from pypy.interpreter.baseobjspace import W_Root
-from pypy.objspace.std.dictmultiobject import W_DictMultiObject
+from pypy.objspace.std.dictmultiobject import W_DictMultiObject, DictStrategy, ObjectDictStrategy
 from pypy.objspace.std.dictmultiobject import IteratorImplementation
-from pypy.objspace.std.dictmultiobject import _is_sane_hash
+from pypy.objspace.std.dictmultiobject import _never_equal_to_string
 from pypy.objspace.std.objectobject import W_ObjectObject
 from pypy.objspace.std.typeobject import TypeCell
 
@@ -53,7 +53,7 @@
         else:
             return self._index_indirection(selector)
 
-    @jit.purefunction
+    @jit.elidable
     def _index_jit_pure(self, name, index):
         return self._index_indirection((name, index))
 
@@ -113,14 +113,14 @@
     def set_terminator(self, obj, terminator):
         raise NotImplementedError("abstract base class")
 
-    @jit.purefunction
+    @jit.elidable
     def size_estimate(self):
         return self._size_estimate >> NUM_DIGITS
 
     def search(self, attrtype):
         return None
 
-    @jit.purefunction
+    @jit.elidable
     def _get_new_attr(self, name, index):
         selector = name, index
         cache = self.cache_attrs
@@ -154,7 +154,7 @@
         obj._set_mapdict_map(attr)
         obj._mapdict_write_storage(attr.position, w_value)
 
-    def materialize_r_dict(self, space, obj, w_d):
+    def materialize_r_dict(self, space, obj, dict_w):
         raise NotImplementedError("abstract base class")
 
     def remove_dict_entries(self, obj):
@@ -205,7 +205,7 @@
         Terminator.__init__(self, space, w_cls)
         self.devolved_dict_terminator = DevolvedDictTerminator(space, w_cls)
 
-    def materialize_r_dict(self, space, obj, w_d):
+    def materialize_r_dict(self, space, obj, dict_w):
         result = Object()
         result.space = space
         result._init_empty(self.devolved_dict_terminator)
@@ -297,11 +297,11 @@
             return self
         return self.back.search(attrtype)
 
-    def materialize_r_dict(self, space, obj, w_d):
-        new_obj = self.back.materialize_r_dict(space, obj, w_d)
+    def materialize_r_dict(self, space, obj, dict_w):
+        new_obj = self.back.materialize_r_dict(space, obj, dict_w)
         if self.selector[1] == DICT:
             w_attr = space.wrap(self.selector[0])
-            w_d.r_dict_content[w_attr] = obj._mapdict_read_storage(self.position)
+            dict_w[w_attr] = obj._mapdict_read_storage(self.position)
         else:
             self._copy_attr(obj, new_obj)
         return new_obj
@@ -357,7 +357,7 @@
         self._set_mapdict_storage_and_map(new_obj.storage, new_obj.map)
 
     def _get_mapdict_map(self):
-        return jit.hint(self.map, promote=True)
+        return jit.promote(self.map)
     def _set_mapdict_map(self, map):
         self.map = map
     # _____________________________________________
@@ -382,7 +382,10 @@
         if w_dict is not None:
             assert isinstance(w_dict, W_DictMultiObject)
             return w_dict
-        w_dict = MapDictImplementation(space, self)
+
+        strategy = space.fromcache(MapDictStrategy)
+        storage = strategy.erase(self)
+        w_dict = W_DictMultiObject(space, strategy, storage)
         flag = self._get_mapdict_map().write(self, ("dict", SPECIAL), w_dict)
         assert flag
         return w_dict
@@ -392,8 +395,8 @@
         w_dict = check_new_dictionary(space, w_dict)
         w_olddict = self.getdict(space)
         assert isinstance(w_dict, W_DictMultiObject)
-        if w_olddict.r_dict_content is None:
-            w_olddict._as_rdict()
+        if type(w_olddict.strategy) is not ObjectDictStrategy:
+            w_olddict.strategy.switch_to_object_strategy(w_olddict)
         flag = self._get_mapdict_map().write(self, ("dict", SPECIAL), w_dict)
         assert flag
 
@@ -575,105 +578,121 @@
 # ____________________________________________________________
 # dict implementation
 
+class MapDictStrategy(DictStrategy):
 
-class MapDictImplementation(W_DictMultiObject):
-    def __init__(self, space, w_obj):
+    erase, unerase = rerased.new_erasing_pair("map")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
+
+    def __init__(self, space):
         self.space = space
-        self.w_obj = w_obj
 
-    def impl_getitem(self, w_lookup):
+    def switch_to_object_strategy(self, w_dict):
+        w_obj = self.unerase(w_dict.dstorage)
+        strategy = self.space.fromcache(ObjectDictStrategy)
+        dict_w = strategy.unerase(strategy.get_empty_storage())
+        w_dict.strategy = strategy
+        w_dict.dstorage = strategy.erase(dict_w)
+        assert w_obj.getdict(self.space) is w_dict
+        materialize_r_dict(self.space, w_obj, dict_w)
+
+    def getitem(self, w_dict, w_key):
         space = self.space
-        w_lookup_type = space.type(w_lookup)
+        w_lookup_type = space.type(w_key)
         if space.is_w(w_lookup_type, space.w_str):
-            return self.impl_getitem_str(space.str_w(w_lookup))
-        elif _is_sane_hash(space, w_lookup_type):
+            return self.getitem_str(w_dict, space.str_w(w_key))
+        elif _never_equal_to_string(space, w_lookup_type):
             return None
         else:
-            return self._as_rdict().impl_fallback_getitem(w_lookup)
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.getitem(w_key)
 
-    def impl_getitem_str(self, key):
-        return self.w_obj.getdictvalue(self.space, key)
+    def getitem_str(self, w_dict, key):
+        w_obj = self.unerase(w_dict.dstorage)
+        return w_obj.getdictvalue(self.space, key)
 
-    def impl_setitem_str(self,  key, w_value):
-        flag = self.w_obj.setdictvalue(self.space, key, w_value)
+    def setitem_str(self, w_dict, key, w_value):
+        w_obj = self.unerase(w_dict.dstorage)
+        flag = w_obj.setdictvalue(self.space, key, w_value)
         assert flag
 
-    def impl_setitem(self,  w_key, w_value):
+    def setitem(self, w_dict, w_key, w_value):
         space = self.space
         if space.is_w(space.type(w_key), space.w_str):
-            self.impl_setitem_str(self.space.str_w(w_key), w_value)
+            self.setitem_str(w_dict, self.space.str_w(w_key), w_value)
         else:
-            self._as_rdict().impl_fallback_setitem(w_key, w_value)
+            self.switch_to_object_strategy(w_dict)
+            w_dict.setitem(w_key, w_value)
 
-    def impl_setdefault(self,  w_key, w_default):
+    def setdefault(self, w_dict, w_key, w_default):
         space = self.space
         if space.is_w(space.type(w_key), space.w_str):
             key = space.str_w(w_key)
-            w_result = self.impl_getitem_str(key)
+            w_result = self.getitem_str(w_dict, key)
             if w_result is not None:
                 return w_result
-            self.impl_setitem_str(key, w_default)
+            self.setitem_str(w_dict, key, w_default)
             return w_default
         else:
-            return self._as_rdict().impl_fallback_setdefault(w_key, w_default)
+            self.switch_to_object_strategy(w_dict)
+            return w_dict.setdefault(w_key, w_default)
 
-    def impl_delitem(self, w_key):
+    def delitem(self, w_dict, w_key):
         space = self.space
         w_key_type = space.type(w_key)
+        w_obj = self.unerase(w_dict.dstorage)
         if space.is_w(w_key_type, space.w_str):
-            flag = self.w_obj.deldictvalue(space, w_key)
+            flag = w_obj.deldictvalue(space, w_key)
             if not flag:
                 raise KeyError
-        elif _is_sane_hash(space, w_key_type):
+        elif _never_equal_to_string(space, w_key_type):
             raise KeyError
         else:
-            self._as_rdict().impl_fallback_delitem(w_key)
+            self.switch_to_object_strategy(w_dict)
+            w_dict.delitem(w_key)
 
-    def impl_length(self):
+    def length(self, w_dict):
         res = 0
-        curr = self.w_obj._get_mapdict_map().search(DICT)
+        curr = self.unerase(w_dict.dstorage)._get_mapdict_map().search(DICT)
         while curr is not None:
             curr = curr.back
             curr = curr.search(DICT)
             res += 1
         return res
 
-    def impl_iter(self):
-        return MapDictIteratorImplementation(self.space, self)
+    def iter(self, w_dict):
+        return MapDictIteratorImplementation(self.space, self, w_dict)
 
-    def impl_clear(self):
-        w_obj = self.w_obj
+    def clear(self, w_dict):
+        w_obj = self.unerase(w_dict.dstorage)
         new_obj = w_obj._get_mapdict_map().remove_dict_entries(w_obj)
         _become(w_obj, new_obj)
 
-    def _clear_fields(self):
-        self.w_obj = None
+    def popitem(self, w_dict):
+        curr = self.unerase(w_dict.dstorage)._get_mapdict_map().search(DICT)
+        if curr is None:
+            raise KeyError
+        key = curr.selector[0]
+        w_value = self.getitem_str(w_dict, key)
+        w_key = self.space.wrap(key)
+        self.delitem(w_dict, w_key)
+        return (w_key, w_value)
 
-    def _as_rdict(self):
-        self.initialize_as_rdict()
-        space = self.space
-        w_obj = self.w_obj
-        materialize_r_dict(space, w_obj, self)
-        self._clear_fields()
-        return self
-
-
-def materialize_r_dict(space, obj, w_d):
+def materialize_r_dict(space, obj, dict_w):
     map = obj._get_mapdict_map()
-    assert obj.getdict(space) is w_d
-    new_obj = map.materialize_r_dict(space, obj, w_d)
+    new_obj = map.materialize_r_dict(space, obj, dict_w)
     _become(obj, new_obj)
 
 class MapDictIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
+    def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
-        w_obj = dictimplementation.w_obj
+        w_obj = strategy.unerase(dictimplementation.dstorage)
         self.w_obj = w_obj
         self.orig_map = self.curr_map = w_obj._get_mapdict_map()
 
     def next_entry(self):
         implementation = self.dictimplementation
-        assert isinstance(implementation, MapDictImplementation)
+        assert isinstance(implementation.strategy, MapDictStrategy)
         if self.orig_map is not self.w_obj._get_mapdict_map():
             return None, None
         if self.curr_map:
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -11,7 +11,7 @@
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.rlib.rarithmetic import base_int, widen
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.jit import hint
+from pypy.rlib import jit
 from pypy.rlib.rbigint import rbigint
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -255,7 +255,7 @@
             w_result = self.wrap_exception_cls(x)
             if w_result is not None:
                 return w_result
-        from fake import fake_object
+        from pypy.objspace.std.fake import fake_object
         return fake_object(self, x)
 
     def wrap_exception_cls(self, x):
@@ -311,6 +311,10 @@
                 classofinstance=classofinstance,
                 strdict=strdict)
 
+    def newset(self):
+        from pypy.objspace.std.setobject import newset
+        return W_SetObject(self, newset(self))
+
     def newslice(self, w_start, w_end, w_step):
         return W_SliceObject(w_start, w_end, w_step)
 
@@ -318,7 +322,7 @@
         return W_SeqIterObject(w_obj)
 
     def type(self, w_obj):
-        hint(w_obj.__class__, promote=True)
+        jit.promote(w_obj.__class__)
         return w_obj.getclass(self)
 
     def lookup(self, w_obj, name):
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -986,7 +986,7 @@
 ##        return space.wrap(0)
 ##    return space.wrap(result)
 
-import unicodetype
+from pypy.objspace.std import unicodetype
 register_all(vars(), unicodetype)
 
 # str.strip(unicode) needs to convert self to unicode and call unicode.strip we
@@ -997,7 +997,7 @@
 # methods?
 
 class str_methods:
-    import stringtype
+    from pypy.objspace.std import stringtype
     W_RopeUnicodeObject = W_RopeUnicodeObject
     from pypy.objspace.std.ropeobject import W_RopeObject
     def str_strip__Rope_RopeUnicode(space, w_self, w_chars):
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -112,7 +112,7 @@
 # some helper functions
 
 def newset(space):
-    return r_dict(space.eq_w, space.hash_w)
+    return r_dict(space.eq_w, space.hash_w, force_non_null=True)
 
 def make_setdata_from_w_iterable(space, w_iterable=None):
     """Return a new r_dict with the content of w_iterable."""
@@ -466,12 +466,11 @@
     return space.wrap(hash)
 
 def set_pop__Set(space, w_left):
-    for w_key in w_left.setdata:
-        break
-    else:
+    try:
+        w_key, _ = w_left.setdata.popitem()
+    except KeyError:
         raise OperationError(space.w_KeyError,
                                 space.wrap('pop from an empty set'))
-    del w_left.setdata[w_key]
     return w_key
 
 def and__Set_Set(space, w_left, w_other):
diff --git a/pypy/objspace/std/test/test_celldict.py b/pypy/objspace/std/test/test_celldict.py
--- a/pypy/objspace/std/test/test_celldict.py
+++ b/pypy/objspace/std/test/test_celldict.py
@@ -1,6 +1,7 @@
 import py
 from pypy.conftest import gettestobjspace, option
-from pypy.objspace.std.celldict import ModuleCell, ModuleDictImplementation
+from pypy.objspace.std.dictmultiobject import W_DictMultiObject
+from pypy.objspace.std.celldict import ModuleCell, ModuleDictStrategy
 from pypy.objspace.std.test.test_dictmultiobject import FakeSpace
 from pypy.interpreter import gateway
 
@@ -8,7 +9,15 @@
 
 class TestCellDict(object):
     def test_basic_property(self):
-        d = ModuleDictImplementation(space)
+        strategy = ModuleDictStrategy(space)
+        storage = strategy.get_empty_storage()
+        d = W_DictMultiObject(space, strategy, storage)
+
+        # replace getcell with getcell from strategy
+        def f(key, makenew):
+            return strategy.getcell(d, key, makenew)
+        d.getcell = f
+
         d.setitem("a", 1)
         assert d.getcell("a", False) is d.getcell("a", False)
         acell = d.getcell("a", False)
@@ -29,3 +38,33 @@
         assert d.getitem("a") is None
         assert d.getcell("a", False) is acell
         assert d.length() == 0
+
+class AppTestCellDict(object):
+    OPTIONS = {"objspace.std.withcelldict": True}
+
+    def setup_class(cls):
+        if option.runappdirect:
+            py.test.skip("__repr__ doesn't work on appdirect")
+        strategy = ModuleDictStrategy(cls.space)
+        storage = strategy.get_empty_storage()
+        cls.w_d = W_DictMultiObject(cls.space, strategy, storage)
+
+    def test_popitem(self):
+        import __pypy__
+
+        d = self.d
+        assert "ModuleDict" in __pypy__.internal_repr(d)
+        raises(KeyError, d.popitem)
+        d["a"] = 3
+        x = d.popitem()
+        assert x == ("a", 3)
+
+    def test_degenerate(self):
+        import __pypy__
+
+        d = self.d
+        assert "ModuleDict" in __pypy__.internal_repr(d)
+        d["a"] = 3
+        del d["a"]
+        d[object()] = 5
+        assert d.values() == [5]
\ No newline at end of file
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1,12 +1,13 @@
+import py
 import sys
 from pypy.interpreter.error import OperationError
 from pypy.objspace.std.dictmultiobject import \
      W_DictMultiObject, setitem__DictMulti_ANY_ANY, getitem__DictMulti_ANY, \
-     StrDictImplementation
+     StringDictStrategy, ObjectDictStrategy
 
-from pypy.objspace.std.celldict import ModuleDictImplementation
+from pypy.objspace.std.celldict import ModuleDictStrategy
 from pypy.conftest import gettestobjspace
-
+from pypy.conftest import option
 
 class TestW_DictObject:
 
@@ -17,7 +18,7 @@
         space = self.space
         d = self.space.newdict()
         assert not self.space.is_true(d)
-        assert d.r_dict_content is None
+        assert type(d.strategy) is not ObjectDictStrategy
 
     def test_nonempty(self):
         space = self.space
@@ -137,31 +138,31 @@
         cls.w_on_pypy = cls.space.wrap("__pypy__" in sys.builtin_module_names)
 
     def test_equality(self):
-        d = {1:2}
-        f = {1:2}
+        d = {1: 2}
+        f = {1: 2}
         assert d == f
-        assert d != {1:3}
+        assert d != {1: 3}
 
     def test_clear(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         d.clear()
         assert len(d) == 0
 
     def test_copy(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         dd = d.copy()
         assert d == dd
         assert not d is dd
 
     def test_get(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         assert d.get(1) == 2
-        assert d.get(1,44) == 2
+        assert d.get(1, 44) == 2
         assert d.get(33) == None
-        assert d.get(33,44) == 44
+        assert d.get(33, 44) == 44
 
     def test_pop(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         dd = d.copy()
         result = dd.pop(1)
         assert result == 2
@@ -176,18 +177,18 @@
         raises(KeyError, dd.pop, 33)
 
     def test_has_key(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         assert d.has_key(1)
         assert not d.has_key(33)
 
     def test_items(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         its = d.items()
         its.sort()
-        assert its == [(1,2),(3,4)]
+        assert its == [(1, 2), (3, 4)]
 
     def test_iteritems(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         dd = d.copy()
         for k, v in d.iteritems():
             assert v == dd[k]
@@ -195,33 +196,33 @@
         assert not dd
 
     def test_iterkeys(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         dd = d.copy()
         for k in d.iterkeys():
             del dd[k]
         assert not dd
 
     def test_itervalues(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         values = []
         for k in d.itervalues():
             values.append(k)
         assert values == d.values()
 
     def test_keys(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         kys = d.keys()
         kys.sort()
-        assert kys == [1,3]
+        assert kys == [1, 3]
 
     def test_popitem(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         it = d.popitem()
         assert len(d) == 1
-        assert it==(1,2) or it==(3,4)
+        assert it == (1, 2) or it == (3, 4)
         it1 = d.popitem()
         assert len(d) == 0
-        assert (it!=it1) and (it1==(1,2) or it1==(3,4))
+        assert (it != it1) and (it1 == (1, 2) or it1 == (3, 4))
         raises(KeyError, d.popitem)
 
     def test_popitem_2(self):
@@ -233,8 +234,33 @@
         assert it1 == ('x', 5)
         raises(KeyError, d.popitem)
 
+    def test_popitem3(self):
+        #object
+        d = {"a": 1, 2: 2, "c": 3}
+        l = []
+        while True:
+            try:
+                l.append(d.popitem())
+            except KeyError:
+                break;
+        assert ("a", 1) in l
+        assert (2, 2) in l
+        assert ("c", 3) in l
+
+        #string
+        d = {"a": 1, "b":2, "c":3}
+        l = []
+        while True:
+            try:
+                l.append(d.popitem())
+            except KeyError:
+                break;
+        assert ("a", 1) in l
+        assert ("b", 2) in l
+        assert ("c", 3) in l
+
     def test_setdefault(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         dd = d.copy()
         x = dd.setdefault(1, 99)
         assert d == dd
@@ -267,12 +293,12 @@
             assert k.calls == 1
 
     def test_update(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         dd = d.copy()
         d.update({})
         assert d == dd
-        d.update({3:5, 6:7})
-        assert d == {1:2, 3:5, 6:7}
+        d.update({3: 5, 6: 7})
+        assert d == {1: 2, 3: 5, 6: 7}
 
     def test_update_iterable(self):
         d = {}
@@ -297,15 +323,15 @@
         assert d == {'foo': 'bar', 'baz': 1}
 
     def test_values(self):
-        d = {1:2, 3:4}
+        d = {1: 2, 3: 4}
         vals = d.values()
         vals.sort()
         assert vals == [2,4]
 
     def test_eq(self):
-        d1 = {1:2, 3:4}
-        d2 = {1:2, 3:4}
-        d3 = {1:2}
+        d1 = {1: 2, 3: 4}
+        d2 = {1: 2, 3: 4}
+        d3 = {1: 2}
         bool = d1 == d2
         assert bool == True
         bool = d1 == d3
@@ -316,10 +342,10 @@
         assert bool == True
 
     def test_lt(self):
-        d1 = {1:2, 3:4}
-        d2 = {1:2, 3:4}
-        d3 = {1:2, 3:5}
-        d4 = {1:2}
+        d1 = {1: 2, 3: 4}
+        d2 = {1: 2, 3: 4}
+        d3 = {1: 2, 3: 5}
+        d4 = {1: 2}
         bool = d1 < d2
         assert bool == False
         bool = d1 < d3
@@ -366,21 +392,17 @@
     def test_new(self):
         d = dict()
         assert d == {}
-        args = [['a',2], [23,45]]
+        args = [['a', 2], [23, 45]]
         d = dict(args)
-        assert d == {'a':2, 23:45}
+        assert d == {'a': 2, 23: 45}
         d = dict(args, a=33, b=44)
-        assert d == {'a':33, 'b':44, 23:45}
+        assert d == {'a': 33, 'b': 44, 23: 45}
         d = dict(a=33, b=44)
-        assert d == {'a':33, 'b':44}
-        d = dict({'a':33, 'b':44})
-        assert d == {'a':33, 'b':44}
-        try: d = dict(23)
-        except (TypeError, ValueError): pass
-        else: self.fail("dict(23) should raise!")
-        try: d = dict([[1,2,3]])
-        except (TypeError, ValueError): pass
-        else: self.fail("dict([[1,2,3]]) should raise!")
+        assert d == {'a': 33, 'b': 44}
+        d = dict({'a': 33, 'b': 44})
+        assert d == {'a': 33, 'b': 44}
+        raises((TypeError, ValueError), dict, 23)
+        raises((TypeError, ValueError), dict, [[1, 2, 3]])
 
     def test_fromkeys(self):
         assert {}.fromkeys([1, 2], 1) == {1: 1, 2: 1}
@@ -527,6 +549,12 @@
             __missing__ = SpecialDescr(missing)
         assert X()['hi'] == 42
 
+    def test_empty_dict(self):
+        d = {}
+        raises(KeyError, d.popitem)
+        assert d.items() == []
+        assert d.values() == []
+        assert d.keys() == []
 
 class AppTest_DictMultiObject(AppTest_DictObject):
 
@@ -706,10 +734,12 @@
 class AppTestModuleDict(object):
     def setup_class(cls):
         cls.space = gettestobjspace(**{"objspace.std.withcelldict": True})
+        if option.runappdirect:
+            py.test.skip("__repr__ doesn't work on appdirect")
 
     def w_impl_used(self, obj):
         import __pypy__
-        assert "ModuleDictImplementation" in __pypy__.internal_repr(obj)
+        assert "ModuleDictStrategy" in __pypy__.internal_repr(obj)
 
     def test_check_module_uses_module_dict(self):
         m = type(__builtins__)("abc")
@@ -719,6 +749,64 @@
         d = type(__builtins__)("abc").__dict__
         raises(KeyError, "d['def']")
 
+    def test_fallback_evil_key(self):
+        class F(object):
+            def __hash__(self):
+                return hash("s")
+            def __eq__(self, other):
+                return other == "s"
+        d = type(__builtins__)("abc").__dict__
+        d["s"] = 12
+        assert d["s"] == 12
+        assert d[F()] == d["s"]
+
+        d = type(__builtins__)("abc").__dict__
+        x = d.setdefault("s", 12)
+        assert x == 12
+        x = d.setdefault(F(), 12)
+        assert x == 12
+
+        d = type(__builtins__)("abc").__dict__
+        x = d.setdefault(F(), 12)
+        assert x == 12
+
+        d = type(__builtins__)("abc").__dict__
+        d["s"] = 12
+        del d[F()]
+
+        assert "s" not in d
+        assert F() not in d
+
+class AppTestStrategies(object):
+    def setup_class(cls):
+        if option.runappdirect:
+            py.test.skip("__repr__ doesn't work on appdirect")
+
+    def w_get_strategy(self, obj):
+        import __pypy__
+        r = __pypy__.internal_repr(obj)
+        return r[r.find("(") + 1: r.find(")")]
+
+    def test_empty_to_string(self):
+        d = {}
+        assert "EmptyDictStrategy" in self.get_strategy(d)
+        d["a"] = 1
+        assert "StringDictStrategy" in self.get_strategy(d)
+
+        class O(object):
+            pass
+        o = O()
+        d = o.__dict__ = {}
+        assert "EmptyDictStrategy" in self.get_strategy(d)
+        o.a = 1
+        assert "StringDictStrategy" in self.get_strategy(d)
+
+    def test_empty_to_int(self):
+        import sys
+        d = {}
+        d[1] = "hi"
+        assert "IntDictStrategy" in self.get_strategy(d)
+        assert d[1L] == "hi"
 
 
 class FakeString(str):
@@ -759,6 +847,10 @@
         assert isinstance(string, str)
         return string
 
+    def int_w(self, integer):
+        assert isinstance(integer, int)
+        return integer
+
     def wrap(self, obj):
         return obj
 
@@ -790,6 +882,10 @@
 
     w_StopIteration = StopIteration
     w_None = None
+    w_NoneType = type(None, None)
+    w_int = int
+    w_bool = bool
+    w_float = float
     StringObjectCls = FakeString
     w_dict = W_DictMultiObject
     iter = iter
@@ -799,12 +895,9 @@
 class Config:
     class objspace:
         class std:
-            withdictmeasurement = False
             withsmalldicts = False
             withcelldict = False
             withmethodcache = False
-        class opcodes:
-            CALL_LIKELY_BUILTIN = False
 
 FakeSpace.config = Config()
 
@@ -834,14 +927,20 @@
         self.impl = self.get_impl()
 
     def get_impl(self):
-        return self.ImplementionClass(self.fakespace)
+        strategy = self.StrategyClass(self.fakespace)
+        storage = strategy.get_empty_storage()
+        w_dict = self.fakespace.allocate_instance(W_DictMultiObject, None)
+        W_DictMultiObject.__init__(w_dict, self.fakespace, strategy, storage)
+        return w_dict
 
     def fill_impl(self):
         self.impl.setitem(self.string, 1000)
         self.impl.setitem(self.string2, 2000)
 
     def check_not_devolved(self):
-        assert self.impl.r_dict_content is None
+        #XXX check if strategy changed!?
+        assert type(self.impl.strategy) is self.StrategyClass
+        #assert self.impl.r_dict_content is None
 
     def test_setitem(self):
         self.impl.setitem(self.string, 1000)
@@ -913,7 +1012,7 @@
         for x in xrange(100):
             impl.setitem(self.fakespace.str_w(str(x)), x)
             impl.setitem(x, x)
-        assert impl.r_dict_content is not None
+        assert type(impl.strategy) is ObjectDictStrategy
 
     def test_setdefault_fast(self):
         on_pypy = "__pypy__" in sys.builtin_module_names
@@ -928,8 +1027,38 @@
         if on_pypy:
             assert key.hash_count == 2
 
+    def test_fallback_evil_key(self):
+        class F(object):
+            def __hash__(self):
+                return hash("s")
+            def __eq__(self, other):
+                return other == "s"
+
+        d = self.get_impl()
+        d.setitem("s", 12)
+        assert d.getitem("s") == 12
+        assert d.getitem(F()) == d.getitem("s")
+
+        d = self.get_impl()
+        x = d.setdefault("s", 12)
+        assert x == 12
+        x = d.setdefault(F(), 12)
+        assert x == 12
+
+        d = self.get_impl()
+        x = d.setdefault(F(), 12)
+        assert x == 12
+
+        d = self.get_impl()
+        d.setitem("s", 12)
+        d.delitem(F())
+
+        assert "s" not in d.keys()
+        assert F() not in d.keys()
+
 class TestStrDictImplementation(BaseTestRDictImplementation):
-    ImplementionClass = StrDictImplementation
+    StrategyClass = StringDictStrategy
+    #ImplementionClass = StrDictImplementation
 
     def test_str_shortcut(self):
         self.fill_impl()
@@ -942,10 +1071,10 @@
 ##     DevolvedClass = MeasuringDictImplementation
 
 class TestModuleDictImplementation(BaseTestRDictImplementation):
-    ImplementionClass = ModuleDictImplementation
+    StrategyClass = ModuleDictStrategy
 
 class TestModuleDictImplementationWithBuiltinNames(BaseTestRDictImplementation):
-    ImplementionClass = ModuleDictImplementation
+    StrategyClass = ModuleDictStrategy
 
     string = "int"
     string2 = "isinstance"
@@ -954,19 +1083,19 @@
 class BaseTestDevolvedDictImplementation(BaseTestRDictImplementation):
     def fill_impl(self):
         BaseTestRDictImplementation.fill_impl(self)
-        self.impl._as_rdict()
+        self.impl.strategy.switch_to_object_strategy(self.impl)
 
     def check_not_devolved(self):
         pass
 
 class TestDevolvedStrDictImplementation(BaseTestDevolvedDictImplementation):
-    ImplementionClass = StrDictImplementation
+    StrategyClass = StringDictStrategy
 
 class TestDevolvedModuleDictImplementation(BaseTestDevolvedDictImplementation):
-    ImplementionClass = ModuleDictImplementation
+    StrategyClass = ModuleDictStrategy
 
 class TestDevolvedModuleDictImplementationWithBuiltinNames(BaseTestDevolvedDictImplementation):
-    ImplementionClass = ModuleDictImplementation
+    StrategyClass = ModuleDictStrategy
 
     string = "int"
     string2 = "isinstance"
@@ -975,5 +1104,4 @@
 def test_module_uses_strdict():
     fakespace = FakeSpace()
     d = fakespace.newdict(module=True)
-    assert isinstance(d, StrDictImplementation)
-
+    assert type(d.strategy) is StringDictStrategy
diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py
--- a/pypy/objspace/std/test/test_mapdict.py
+++ b/pypy/objspace/std/test/test_mapdict.py
@@ -250,13 +250,18 @@
 
     class FakeDict(W_DictMultiObject):
         def __init__(self, d):
-            self.r_dict_content = d
+            self.dstorage = d
+
+        class strategy:
+            def unerase(self, x):
+                return d
+        strategy = strategy()
 
     d = {}
     w_d = FakeDict(d)
     flag = obj.map.write(obj, ("dict", SPECIAL), w_d)
     assert flag
-    materialize_r_dict(space, obj, w_d)
+    materialize_r_dict(space, obj, d)
     assert d == {"a": 5, "b": 6, "c": 7}
     assert obj.storage == [50, 60, 70, w_d]
 
@@ -291,18 +296,18 @@
     w_obj = cls.instantiate(self.fakespace)
     return w_obj.getdict(self.fakespace)
 class TestMapDictImplementation(BaseTestRDictImplementation):
-    ImplementionClass = MapDictImplementation
+    StrategyClass = MapDictStrategy
     get_impl = get_impl
 class TestDevolvedMapDictImplementation(BaseTestDevolvedDictImplementation):
     get_impl = get_impl
-    ImplementionClass = MapDictImplementation
+    StrategyClass = MapDictStrategy
 
 # ___________________________________________________________
 # tests that check the obj interface after the dict has devolved
 
 def devolve_dict(space, obj):
     w_d = obj.getdict(space)
-    w_d._as_rdict()
+    w_d.strategy.switch_to_object_strategy(w_d)
 
 def test_get_setdictvalue_after_devolve():
     cls = Class()
@@ -463,6 +468,20 @@
         d['dd'] = 43
         assert a.dd == 41
 
+    def test_popitem(self):
+        class A(object):
+            pass
+        a = A()
+        a.x = 5
+        a.y = 6
+        it1 = a.__dict__.popitem()
+        assert it1 == ("y", 6)
+        it2 = a.__dict__.popitem()
+        assert it2 == ("x", 5)
+        assert a.__dict__ == {}
+        raises(KeyError, a.__dict__.popitem)
+
+
 
     def test_slot_name_conflict(self):
         class A(object):
@@ -604,6 +623,14 @@
         assert a.__dict__ is d
         assert isinstance(a, B)
 
+    def test_setdict(self):
+        class A(object):
+            pass
+
+        a = A()
+        a.__dict__ = {}
+        a.__dict__ = {}
+
 
 class AppTestWithMapDictAndCounters(object):
     def setup_class(cls):
diff --git a/pypy/objspace/std/test/test_setobject.py b/pypy/objspace/std/test/test_setobject.py
--- a/pypy/objspace/std/test/test_setobject.py
+++ b/pypy/objspace/std/test/test_setobject.py
@@ -50,6 +50,10 @@
         u = self.space.wrap(set('simsalabim'))
         assert self.space.eq_w(s,u)
 
+    def test_space_newset(self):
+        s = self.space.newset()
+        assert self.space.str_w(self.space.repr(s)) == 'set([])'
+
 class AppTestAppSetTest:
     def test_subtype(self):
         class subset(set):pass
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -9,8 +9,8 @@
 from pypy.objspace.std.objecttype import object_typedef
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.objectmodel import current_object_addr_as_int, compute_hash
-from pypy.rlib.jit import hint, purefunction_promote, we_are_jitted
-from pypy.rlib.jit import purefunction, dont_look_inside, unroll_safe
+from pypy.rlib.jit import promote, elidable_promote, we_are_jitted
+from pypy.rlib.jit import elidable, dont_look_inside, unroll_safe
 from pypy.rlib.rarithmetic import intmask, r_uint
 
 class TypeCell(W_Root):
@@ -177,7 +177,7 @@
         # prebuilt objects cannot get their version_tag changed
         return w_self._pure_version_tag()
 
-    @purefunction_promote()
+    @elidable_promote()
     def _pure_version_tag(w_self):
         return w_self._version_tag
 
@@ -247,7 +247,7 @@
                     return w_value
         return w_value
 
-    @purefunction
+    @elidable
     def _pure_getdictvalue_no_unwrapping(w_self, space, version_tag, attr):
         return w_self._getdictvalue_no_unwrapping(space, attr)
 
@@ -351,16 +351,16 @@
 
     def lookup_where_with_method_cache(w_self, name):
         space = w_self.space
-        w_self = hint(w_self, promote=True)
+        promote(w_self)
         assert space.config.objspace.std.withmethodcache
-        version_tag = hint(w_self.version_tag(), promote=True)
+        version_tag = promote(w_self.version_tag())
         if version_tag is None:
             tup = w_self._lookup_where(name)
             return tup
         w_class, w_value = w_self._pure_lookup_where_with_method_cache(name, version_tag)
         return w_class, unwrap_cell(space, w_value)
 
-    @purefunction
+    @elidable
     def _pure_lookup_where_with_method_cache(w_self, name, version_tag):
         space = w_self.space
         cache = space.fromcache(MethodCache)
@@ -423,10 +423,13 @@
         return False
 
     def getdict(w_self, space): # returning a dict-proxy!
-        from pypy.objspace.std.dictproxyobject import W_DictProxyObject
+        from pypy.objspace.std.dictproxyobject import DictProxyStrategy
+        from pypy.objspace.std.dictmultiobject import W_DictMultiObject
         if w_self.lazyloaders:
             w_self._freeze_()    # force un-lazification
-        return W_DictProxyObject(space, w_self)
+        strategy = space.fromcache(DictProxyStrategy)
+        storage = strategy.erase(w_self)
+        return W_DictMultiObject(space, strategy, storage)
 
     def unwrap(w_self, space):
         if w_self.instancetypedef.fakedcpytype is not None:
@@ -447,8 +450,8 @@
         w_self.flag_abstract = bool(abstract)
 
     def issubtype(w_self, w_type):
-        w_self = hint(w_self, promote=True)
-        w_type = hint(w_type, promote=True)
+        promote(w_self)
+        promote(w_type)
         if w_self.space.config.objspace.std.withtypeversion and we_are_jitted():
             version_tag1 = w_self.version_tag()
             version_tag2 = w_type.version_tag()
@@ -774,7 +777,7 @@
 # ____________________________________________________________
 
 def call__Type(space, w_type, __args__):
-    w_type = hint(w_type, promote=True)
+    promote(w_type)
     # special case for type(x)
     if space.is_w(w_type, space.w_type):
         try:
@@ -820,7 +823,7 @@
 def _issubtype(w_sub, w_type):
     return w_type in w_sub.mro_w
 
- at purefunction_promote()
+ at elidable_promote()
 def _pure_issubtype(w_sub, w_type, version_tag1, version_tag2):
     return _issubtype(w_sub, w_type)
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -937,7 +937,7 @@
     return formatter.format_string(space.unicode_w(w_unicode))
 
 
-import unicodetype
+from pypy.objspace.std import unicodetype
 register_all(vars(), unicodetype)
 
 # str.strip(unicode) needs to convert self to unicode and call unicode.strip we
@@ -948,7 +948,7 @@
 # methods?
 
 class str_methods:
-    import stringtype
+    from pypy.objspace.std import stringtype
     W_UnicodeObject = W_UnicodeObject
     from pypy.objspace.std.stringobject import W_StringObject
     from pypy.objspace.std.ropeobject import W_RopeObject
diff --git a/pypy/objspace/taint.py b/pypy/objspace/taint.py
--- a/pypy/objspace/taint.py
+++ b/pypy/objspace/taint.py
@@ -92,8 +92,8 @@
     w_realtype = space.type(w_obj)
     if not space.is_w(w_realtype, w_expectedtype):
         #msg = "expected an object of type '%s'" % (
-        #    w_expectedtype.getname(space, '?'),)
-        #    #w_realtype.getname(space, '?'))
+        #    w_expectedtype.getname(space),)
+        #    #w_realtype.getname(space))
         raise OperationError(space.w_TaintError, space.w_None)
     return w_obj
 app_untaint = gateway.interp2app(untaint)
diff --git a/pypy/pytest.ini b/pypy/pytest.ini
new file mode 100644
--- /dev/null
+++ b/pypy/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --assertmode=old
\ No newline at end of file
diff --git a/pypy/rlib/clibffi.py b/pypy/rlib/clibffi.py
--- a/pypy/rlib/clibffi.py
+++ b/pypy/rlib/clibffi.py
@@ -10,6 +10,7 @@
 from pypy.rlib.rmmap import alloc
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLOpenError, DLLHANDLE
+from pypy.rlib import jit
 from pypy.tool.autopath import pypydir
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.translator.platform import platform
@@ -18,6 +19,10 @@
 import sys
 import ctypes.util
 
+from pypy.tool.ansi_print import ansi_log
+log = py.log.Producer("libffi")
+py.log.setconsumer("libffi", ansi_log)
+
 # maaaybe isinstance here would be better. Think
 _MSVC = platform.name == "msvc"
 _MINGW = platform.name == "mingw32"
@@ -67,12 +72,17 @@
             result = os.path.join(dir, 'libffi.a')
             if os.path.exists(result):
                 return result
-        raise ImportError("'libffi.a' not found in %s" % (dirlist,))
+        log.WARNING("'libffi.a' not found in %s" % (dirlist,))
+        log.WARNING("trying to use the dynamic library instead...")
+        return None
 
+    path_libffi_a = None
     if hasattr(platform, 'library_dirs_for_libffi_a'):
+        path_libffi_a = find_libffi_a()
+    if path_libffi_a is not None:
         # platforms on which we want static linking
         libraries = []
-        link_files = [find_libffi_a()]
+        link_files = [path_libffi_a]
     else:
         # platforms on which we want dynamic linking
         libraries = ['ffi']
@@ -261,6 +271,7 @@
 elif _MSVC:
     get_libc_handle = external('pypy_get_libc_handle', [], DLLHANDLE)
 
+    @jit.dont_look_inside
     def get_libc_name():
         return rwin32.GetModuleFileName(get_libc_handle())
 
diff --git a/pypy/rlib/debug.py b/pypy/rlib/debug.py
--- a/pypy/rlib/debug.py
+++ b/pypy/rlib/debug.py
@@ -262,6 +262,28 @@
         return hop.inputarg(hop.args_r[0], arg=0)
 
 
+def mark_dict_non_null(d):
+    """ Mark dictionary as having non-null keys and values. A warning would
+    be emitted (not an error!) in case annotation disagrees.
+    """
+    assert isinstance(d, dict)
+    return d
+
+
+class DictMarkEntry(ExtRegistryEntry):
+    _about_ = mark_dict_non_null
+    
+    def compute_result_annotation(self, s_dict):
+        from pypy.annotation.model import SomeDict, s_None
+
+        assert isinstance(s_dict, SomeDict)
+        s_dict.dictdef.force_non_null = True
+        return s_dict
+
+    def specialize_call(self, hop):
+        hop.exception_cannot_occur()
+        return hop.inputarg(hop.args_r[0], arg=0)
+
 class IntegerCanBeNegative(Exception):
     pass
 
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -6,21 +6,26 @@
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.nonconst import NonConstant
 
-def purefunction(func):
-    """ Decorate a function as pure. Pure means precisely that:
+def elidable(func):
+    """ Decorate a function as "trace-elidable". This means precisely that:
 
     (1) the result of the call should not change if the arguments are
         the same (same numbers or same pointers)
     (2) it's fine to remove the call completely if we can guess the result
     according to rule 1
 
-    Most importantly it doesn't mean that pure function has no observable
-    side effect, but those side effects can be ommited (ie caching).
+    Most importantly it doesn't mean that an elidable function has no observable
+    side effect, but those side effects are idempotent (ie caching).
     For now, such a function should never raise an exception.
     """
-    func._pure_function_ = True
+    func._elidable_function_ = True
     return func
 
+def purefunction(*args, **kwargs):
+    import warnings
+    warnings.warn("purefunction is deprecated, use elidable instead", DeprecationWarning)
+    return elidable(*args, **kwargs)
+
 def hint(x, **kwds):
     """ Hint for the JIT
 
@@ -36,6 +41,10 @@
     """
     return x
 
+ at specialize.argtype(0)
+def promote(x):
+    return hint(x, promote=True)
+
 def dont_look_inside(func):
     """ Make sure the JIT does not trace inside decorated function
     (it becomes a call instead)
@@ -60,13 +69,13 @@
     func._jit_loop_invariant_ = True
     return func
 
-def purefunction_promote(promote_args='all'):
+def elidable_promote(promote_args='all'):
     """ A decorator that promotes all arguments and then calls the supplied
     function
     """
     def decorator(func):
         import inspect
-        purefunction(func)
+        elidable(func)
         args, varargs, varkw, defaults = inspect.getargspec(func)
         args = ["v%s" % (i, ) for i in range(len(args))]
         assert varargs is None and varkw is None
@@ -85,6 +94,12 @@
         return result
     return decorator
 
+def purefunction_promote(*args, **kwargs):
+    import warnings
+    warnings.warn("purefunction_promote is deprecated, use elidable_promote instead", DeprecationWarning)
+    return elidable_promote(*args, **kwargs)
+
+
 def oopspec(spec):
     def decorator(func):
         func.oopspec = spec
@@ -273,15 +288,17 @@
 class JitHintError(Exception):
     """Inconsistency in the JIT hints."""
 
-PARAMETERS = {'threshold': 1000,
+PARAMETERS = {'threshold': 1032, # just above 1024
+              'function_threshold': 1617, # slightly more than one above 
               'trace_eagerness': 200,
               'trace_limit': 12000,
-              'inlining': 0,
+              'inlining': 1,
               'loop_longevity': 1000,
               'retrace_limit': 5,
-              'enable_opts': None, # patched later by optimizeopt/__init__.py
+              'enable_opts': 'all',
               }
 unroll_parameters = unrolling_iterable(PARAMETERS.items())
+DEFAULT = object()
 
 # ____________________________________________________________
 
@@ -336,22 +353,33 @@
     def _set_param(self, name, value):
         # special-cased by ExtRegistryEntry
         # (internal, must receive a constant 'name')
+        # if value is DEFAULT, sets the default value.
         assert name in PARAMETERS
 
     @specialize.arg(0, 1)
     def set_param(self, name, value):
         """Set one of the tunable JIT parameter."""
-        for name1, _ in unroll_parameters:
-            if name1 == name:
-                self._set_param(name1, value)
-                return
-        raise ValueError("no such parameter")
+        self._set_param(name, value)
+
+    @specialize.arg(0, 1)
+    def set_param_to_default(self, name):
+        """Reset one of the tunable JIT parameters to its default value."""
+        self._set_param(name, DEFAULT)
 
     def set_user_param(self, text):
         """Set the tunable JIT parameters from a user-supplied string
-        following the format 'param=value,param=value'.  For programmatic
-        setting of parameters, use directly JitDriver.set_param().
+        following the format 'param=value,param=value', or 'off' to
+        disable the JIT.  For programmatic setting of parameters, use
+        directly JitDriver.set_param().
         """
+        if text == 'off':
+            self.set_param('threshold', -1)
+            self.set_param('function_threshold', -1)
+            return
+        if text == 'default':
+            for name1, _ in unroll_parameters:
+                self.set_param_to_default(name1)
+            return
         for s in text.split(','):
             s = s.strip(' ')
             parts = s.split('=')
@@ -574,15 +602,17 @@
     def compute_result_annotation(self, s_name, s_value):
         from pypy.annotation import model as annmodel
         assert s_name.is_constant()
-        if s_name.const == 'enable_opts':
-            assert annmodel.SomeString(can_be_None=True).contains(s_value)
-        else:
-            assert annmodel.SomeInteger().contains(s_value)
+        if not self.bookkeeper.immutablevalue(DEFAULT).contains(s_value):
+            if s_name.const == 'enable_opts':
+                assert annmodel.SomeString(can_be_None=True).contains(s_value)
+            else:
+                assert annmodel.SomeInteger().contains(s_value)
         return annmodel.s_None
 
     def specialize_call(self, hop):
         from pypy.rpython.lltypesystem import lltype
         from pypy.rpython.lltypesystem.rstr import string_repr
+        from pypy.objspace.flow.model import Constant
 
         hop.exception_cannot_occur()
         driver = self.instance.im_self
@@ -591,7 +621,12 @@
             repr = string_repr
         else:
             repr = lltype.Signed
-        v_value = hop.inputarg(repr, arg=1)
+        if (isinstance(hop.args_v[1], Constant) and
+            hop.args_v[1].value is DEFAULT):
+            value = PARAMETERS[name]
+            v_value = hop.inputconst(repr, value)
+        else:
+            v_value = hop.inputarg(repr, arg=1)
         vlist = [hop.inputconst(lltype.Void, "set_param"),
                  hop.inputconst(lltype.Void, driver),
                  hop.inputconst(lltype.Void, name),
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -1,12 +1,15 @@
+from __future__ import with_statement
+
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.objectmodel import specialize, enforceargs, we_are_translated
-from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat
 from pypy.rlib import jit
 from pypy.rlib import clibffi
 from pypy.rlib.clibffi import get_libc_name, FUNCFLAG_CDECL, AbstractFuncPtr, \
-    push_arg_as_ffiptr, c_ffi_call
+    push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLLHANDLE
+from pypy.rlib.longlong2float import longlong2float, float2longlong
 
 class types(object):
     """
@@ -31,17 +34,21 @@
                 setattr(cls, name, value)
         cls.slong = clibffi.cast_type_to_ffitype(rffi.LONG)
         cls.ulong = clibffi.cast_type_to_ffitype(rffi.ULONG)
+        cls.slonglong = clibffi.cast_type_to_ffitype(rffi.LONGLONG)
+        cls.ulonglong = clibffi.cast_type_to_ffitype(rffi.ULONGLONG)
+        cls.wchar_t = clibffi.cast_type_to_ffitype(lltype.UniChar)
         del cls._import
 
     @staticmethod
-    @jit.purefunction
+    @jit.elidable
     def getkind(ffi_type):
         """Returns 'v' for void, 'f' for float, 'i' for signed integer,
         and 'u' for unsigned integer.
         """
         if   ffi_type is types.void:    return 'v'
         elif ffi_type is types.double:  return 'f'
-        elif ffi_type is types.pointer: return 'i'
+        elif ffi_type is types.float:   return 's'
+        elif ffi_type is types.pointer: return 'u'
         #
         elif ffi_type is types.schar:   return 'i'
         elif ffi_type is types.uchar:   return 'u'
@@ -58,13 +65,19 @@
         elif ffi_type is types.uint16:  return 'u'
         elif ffi_type is types.sint32:  return 'i'
         elif ffi_type is types.uint32:  return 'u'
-        ## we only support integers that fit in a lltype.Signed (==rffi.LONG)
-        ## (on 64-bit platforms, types.sint64 is types.slong and the case is
-        ## caught above)
-        ## elif ffi_type is types.sint64:  return 'i'
-        ## elif ffi_type is types.uint64:  return 'u'
+        ## (note that on 64-bit platforms, types.sint64 is types.slong and the
+        ## case is caught above)
+        elif ffi_type is types.sint64:  return 'I'
+        elif ffi_type is types.uint64:  return 'U'
+        #
+        elif types.is_struct(ffi_type): return 'S'
         raise KeyError
 
+    @staticmethod
+    @jit.elidable
+    def is_struct(ffi_type):
+        return intmask(ffi_type.c_type) == intmask(FFI_TYPE_STRUCT)
+
 types._import()
 
 @specialize.arg(0)
@@ -78,8 +91,11 @@
     sz = rffi.sizeof(TYPE)
     return sz <= rffi.sizeof(rffi.LONG)
 
+
 # ======================================================================
 
+IS_32_BIT = (r_uint.BITS == 32)
+
 @specialize.memo()
 def _check_type(TYPE):
     if isinstance(TYPE, lltype.Ptr):
@@ -105,11 +121,37 @@
             val = rffi.cast(rffi.LONG, val)
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
+        elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
+            raise TypeError, 'r_(u)longlong not supported by arg(), use arg_(u)longlong()'
+        elif TYPE is rffi.FLOAT:
+            raise TypeError, 'r_singlefloat not supported by arg(), use arg_singlefloat()'
         else:
             raise TypeError, 'Unsupported argument type: %s' % TYPE
         self._append(cls(val))
         return self
 
+    def arg_raw(self, val):
+        self._append(RawArg(val))
+
+    def arg_longlong(self, val):
+        """
+        Note: this is a hack. So far, the JIT does not support long longs, so
+        you must pass it as if it were a python Float (rffi.DOUBLE).  You can
+        use the convenience functions longlong2float and float2longlong to do
+        the conversions.  Note that if you use long longs, the call won't
+        be jitted at all.
+        """
+        assert IS_32_BIT      # use a normal integer on 64-bit platforms
+        self._append(LongLongArg(val))
+
+    def arg_singlefloat(self, val):
+        """
+        Note: you must pass a python Float (rffi.DOUBLE), not a r_singlefloat
+        (else the jit complains).  Note that if you use single floats, the
+        call won't be jitted at all.
+        """
+        self._append(SingleFloatArg(val))
+
     def _append(self, arg):
         if self.first is None:
             self.first = self.last = arg
@@ -132,8 +174,9 @@
     def push(self, func, ll_args, i):
         func._push_int(self.intval, ll_args, i)
 
+
 class FloatArg(AbstractArg):
-    """ An argument holding a float
+    """ An argument holding a python float (i.e. a C double)
     """
 
     def __init__(self, floatval):
@@ -142,6 +185,37 @@
     def push(self, func, ll_args, i):
         func._push_float(self.floatval, ll_args, i)
 
+class RawArg(AbstractArg):
+    """ An argument holding a raw pointer to put inside ll_args
+    """
+
+    def __init__(self, ptrval):
+        self.ptrval = ptrval
+
+    def push(self, func, ll_args, i):
+        func._push_raw(self.ptrval, ll_args, i)
+
+class SingleFloatArg(AbstractArg):
+    """ An argument representing a C float (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_single_float(self.floatval, ll_args, i)
+
+
+class LongLongArg(AbstractArg):
+    """ An argument representing a C long long (but holding a C double)
+    """
+
+    def __init__(self, floatval):
+        self.floatval = floatval
+
+    def push(self, func, ll_args, i):
+        func._push_longlong(self.floatval, ll_args, i)
+
 
 # ======================================================================
 
@@ -164,8 +238,8 @@
     # ========================================================================
 
     @jit.unroll_safe
-    @specialize.arg(2)
-    def call(self, argchain, RESULT):
+    @specialize.arg(2, 3)
+    def call(self, argchain, RESULT, is_struct=False):
         # WARNING!  This code is written carefully in a way that the JIT
         # optimizer will see a sequence of calls like the following:
         #
@@ -179,6 +253,7 @@
         # the optimizer will fail to recognize the pattern and won't turn it
         # into a fast CALL.  Note that "arg = arg.next" is optimized away,
         # assuming that archain is completely virtual.
+        self = jit.promote(self)
         if argchain.numargs != len(self.argtypes):
             raise TypeError, 'Wrong number of arguments: %d expected, got %d' %\
                 (argchain.numargs, len(self.argtypes))
@@ -190,10 +265,24 @@
             i += 1
             arg = arg.next
         #
-        if _fits_into_long(RESULT):
+        if is_struct:
+            assert types.is_struct(self.restype)
+            res = self._do_call_raw(self.funcsym, ll_args)
+        elif _fits_into_long(RESULT):
+            assert not types.is_struct(self.restype)
             res = self._do_call_int(self.funcsym, ll_args)
         elif RESULT is rffi.DOUBLE:
             return self._do_call_float(self.funcsym, ll_args)
+        elif RESULT is rffi.FLOAT:
+            # XXX: even if RESULT is FLOAT, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            return self._do_call_single_float(self.funcsym, ll_args)
+        elif RESULT is rffi.LONGLONG or RESULT is rffi.ULONGLONG:
+            # XXX: even if RESULT is LONGLONG, we still return a DOUBLE, else the
+            # jit complains. Note that the jit is disabled in this case
+            # (it's not a typo, we really return a DOUBLE)
+            assert IS_32_BIT
+            return self._do_call_longlong(self.funcsym, ll_args)
         elif RESULT is lltype.Void:
             return self._do_call_void(self.funcsym, ll_args)
         else:
@@ -222,11 +311,26 @@
     def _push_int(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_raw(self, value, ll_args, i):
+        ll_args[i] = value
+
     @jit.oopspec('libffi_push_float(self, value, ll_args, i)')
     @enforceargs(   None, float, None,    int) # fix the annotation for tests
     def _push_float(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
+    @jit.dont_look_inside
+    def _push_single_float(self, value, ll_args, i):
+        self._push_arg(r_singlefloat(value), ll_args, i)
+
+    @jit.dont_look_inside
+    def _push_longlong(self, floatval, ll_args, i):
+        """
+        Takes a longlong represented as a python Float. It's a hack for the
+        jit, else we could not see the whole libffi module at all"""  
+        self._push_arg(float2longlong(floatval), ll_args, i)
+
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.LONG)
@@ -235,6 +339,21 @@
     def _do_call_float(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.DOUBLE)
 
+    @jit.dont_look_inside
+    def _do_call_single_float(self, funcsym, ll_args):
+        single_res = self._do_call(funcsym, ll_args, rffi.FLOAT)
+        return float(single_res)
+
+    @jit.dont_look_inside
+    def _do_call_raw(self, funcsym, ll_args):
+        # same as _do_call_int, but marked as jit.dont_look_inside
+        return self._do_call(funcsym, ll_args, rffi.LONG)
+
+    @jit.dont_look_inside
+    def _do_call_longlong(self, funcsym, ll_args):
+        llres = self._do_call(funcsym, ll_args, rffi.LONGLONG)
+        return longlong2float(llres)
+
     @jit.oopspec('libffi_call_void(self, funcsym, ll_args)')
     def _do_call_void(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, lltype.Void)
@@ -265,7 +384,14 @@
                             rffi.cast(rffi.VOIDPP, ll_args))
         if RESULT is not lltype.Void:
             TP = lltype.Ptr(rffi.CArray(RESULT))
-            res = rffi.cast(TP, ll_result)[0]
+            buf = rffi.cast(TP, ll_result)
+            if types.is_struct(self.restype):
+                assert RESULT == rffi.LONG
+                # for structs, we directly return the buffer and transfer the
+                # ownership
+                res = rffi.cast(RESULT, buf)
+            else:
+                res = buf[0]
         else:
             res = None
         self._free_buffers(ll_result, ll_args)
@@ -274,11 +400,19 @@
 
     def _free_buffers(self, ll_result, ll_args):
         if ll_result:
-            lltype.free(ll_result, flavor='raw')
+            self._free_buffer_maybe(rffi.cast(rffi.VOIDP, ll_result), self.restype)
         for i in range(len(self.argtypes)):
-            lltype.free(ll_args[i], flavor='raw')
+            argtype = self.argtypes[i]
+            self._free_buffer_maybe(ll_args[i], argtype)
         lltype.free(ll_args, flavor='raw')
 
+    def _free_buffer_maybe(self, buf, ffitype):
+        # if it's a struct, the buffer is not freed and the ownership is
+        # already of the caller (in case of ll_args buffers) or transferred to
+        # it (in case of ll_result buffer)
+        if not types.is_struct(ffitype):
+            lltype.free(buf, flavor='raw')
+
 
 # ======================================================================
 
@@ -288,11 +422,8 @@
     def __init__(self, libname):
         """Load the library, or raises DLOpenError."""
         self.lib = rffi.cast(DLLHANDLE, 0)
-        ll_libname = rffi.str2charp(libname)
-        try:
+        with rffi.scoped_str2charp(libname) as ll_libname:
             self.lib = dlopen(ll_libname)
-        finally:
-            lltype.free(ll_libname, flavor='raw')
 
     def __del__(self):
         if self.lib:
@@ -302,3 +433,6 @@
     def getpointer(self, name, argtypes, restype, flags=FUNCFLAG_CDECL):
         return Func(name, argtypes, restype, dlsym(self.lib, name),
                     flags=flags, keepalive=self)
+
+    def getaddressindll(self, name):
+        return dlsym(self.lib, name)
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -30,25 +30,18 @@
     return llval
 
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-eci = ExternalCompilationInfo(post_include_bits=["""
+eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
+                              post_include_bits=["""
 static double pypy__longlong2float(long long x) {
-    int i;
     double dd;
-    char *p = (char*)&x;
-    char *d = (char*)&dd;
-    for(i = 0; i < 8; i++) {
-        d[i] = p[i];
-    }
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&dd, &x, 8);
     return dd;
 }
 static long long pypy__float2longlong(double x) {
-    int i;
     long long ll;
-    char *p = (char*)&x;
-    char *l = (char*)&ll;
-    for(i = 0; i < 8; i++) {
-        l[i] = p[i];
-    }
+    assert(sizeof(double) == 8 && sizeof(long long) == 8);
+    memcpy(&ll, &x, 8);
     return ll;
 }
 """])
@@ -56,9 +49,9 @@
 longlong2float = rffi.llexternal(
     "pypy__longlong2float", [rffi.LONGLONG], rffi.DOUBLE,
     _callable=longlong2float_emulator, compilation_info=eci,
-    _nowrapper=True, pure_function=True)
+    _nowrapper=True, elidable_function=True)
 
 float2longlong = rffi.llexternal(
     "pypy__float2longlong", [rffi.DOUBLE], rffi.LONGLONG,
     _callable=float2longlong_emulator, compilation_info=eci,
-    _nowrapper=True, pure_function=True)
+    _nowrapper=True, elidable_function=True)
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -448,10 +448,11 @@
     The functions key_eq() and key_hash() are used by the key comparison
     algorithm."""
 
-    def __init__(self, key_eq, key_hash):
+    def __init__(self, key_eq, key_hash, force_non_null=False):
         self._dict = {}
         self.key_eq = key_eq
         self.key_hash = key_hash
+        self.force_non_null = force_non_null
 
     def __getitem__(self, key):
         return self._dict[_r_dictkey(self, key)]
diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -124,7 +124,7 @@
         return len(self._digits)
 
     @staticmethod
-    @jit.purefunction
+    @jit.elidable
     def fromint(intval):
         # This function is marked as pure, so you must not call it and
         # then modify the result.
@@ -156,7 +156,7 @@
         return v
 
     @staticmethod
-    @jit.purefunction
+    @jit.elidable
     def frombool(b):
         # This function is marked as pure, so you must not call it and
         # then modify the result.
@@ -179,7 +179,7 @@
             raise OverflowError
 
     @staticmethod
-    @jit.purefunction
+    @jit.elidable
     def _fromfloat_finite(dval):
         sign = 1
         if dval < 0.0:
@@ -201,7 +201,7 @@
         return v
 
     @staticmethod
-    @jit.purefunction
+    @jit.elidable
     @specialize.argtype(0)
     def fromrarith_int(i):
         # This function is marked as pure, so you must not call it and
@@ -209,7 +209,7 @@
         return rbigint(*args_from_rarith_int(i))
 
     @staticmethod
-    @jit.purefunction
+    @jit.elidable
     def fromdecimalstr(s):
         # This function is marked as pure, so you must not call it and
         # then modify the result.
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
         hop.exception_cannot_occur()
         return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
 
+def _make_sure_does_not_move(p):
+    """'p' is a non-null GC object.  This (tries to) make sure that the
+    object does not move any more, by forcing collections if needed.
+    Warning: should ideally only be used with the minimark GC, and only
+    on objects that are already a bit old, so have a chance to be
+    already non-movable."""
+    if not we_are_translated():
+        return
+    i = 0
+    while can_move(p):
+        if i > 6:
+            raise NotImplementedError("can't make object non-movable!")
+        collect(i)
+        i += 1
+
 def _heap_stats():
     raise NotImplementedError # can't be run directly
 
@@ -257,7 +272,9 @@
     if isinstance(TP.OF, lltype.Ptr) and TP.OF.TO._gckind == 'gc':
         # perform a write barrier that copies necessary flags from
         # source to dest
-        if not llop.gc_writebarrier_before_copy(lltype.Bool, source, dest):
+        if not llop.gc_writebarrier_before_copy(lltype.Bool, source, dest,
+                                                source_start, dest_start,
+                                                length):
             # if the write barrier is not supported, copy by hand
             for i in range(length):
                 dest[i + dest_start] = source[i + source_start]
diff --git a/pypy/rlib/rmd5.py b/pypy/rlib/rmd5.py
--- a/pypy/rlib/rmd5.py
+++ b/pypy/rlib/rmd5.py
@@ -51,7 +51,7 @@
     _rotateLeft = rffi.llexternal(
         "pypy__rotateLeft", [lltype.Unsigned, lltype.Signed], lltype.Unsigned,
         _callable=_rotateLeft_emulator, compilation_info=eci,
-        _nowrapper=True, pure_function=True)
+        _nowrapper=True, elidable_function=True)
     # we expect the function _rotateLeft to be actually inlined
 
 
diff --git a/pypy/rlib/ropenssl.py b/pypy/rlib/ropenssl.py
--- a/pypy/rlib/ropenssl.py
+++ b/pypy/rlib/ropenssl.py
@@ -134,7 +134,8 @@
 
 def external(name, argtypes, restype, **kw):
     kw['compilation_info'] = eci
-    eci.export_symbols += (name,)
+    if not kw.get('macro', False):
+        eci.export_symbols += (name,)
     return rffi.llexternal(
         name, argtypes, restype, **kw)
 
@@ -150,7 +151,7 @@
                 [rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT], lltype.Void))],
              lltype.Void)
 ssl_external('CRYPTO_set_id_callback',
-             [lltype.Ptr(lltype.FuncType([], rffi.INT))],
+             [lltype.Ptr(lltype.FuncType([], rffi.LONG))],
              lltype.Void)
              
 if HAVE_OPENSSL_RAND:
diff --git a/pypy/rlib/rrandom.py b/pypy/rlib/rrandom.py
--- a/pypy/rlib/rrandom.py
+++ b/pypy/rlib/rrandom.py
@@ -24,8 +24,7 @@
     def __init__(self, seed=r_uint(0)):
         self.state = [r_uint(0)] * N
         self.index = 0
-        if seed:
-            self.init_genrand(seed)
+        self.init_genrand(seed)
 
     def init_genrand(self, s):
         mt = self.state
diff --git a/pypy/rlib/rsdl/RMix.py b/pypy/rlib/rsdl/RMix.py
--- a/pypy/rlib/rsdl/RMix.py
+++ b/pypy/rlib/rsdl/RMix.py
@@ -52,7 +52,8 @@
                                ChunkPtr)
 
 def LoadWAV(filename_ccharp):
-    return LoadWAV_RW(RSDL.RWFromFile(filename_ccharp, rffi.str2charp('rb')), 1)
+    with rffi.scoped_str2charp('rb') as mode:
+        return LoadWAV_RW(RSDL.RWFromFile(filename_ccharp, mode), 1)
 
 
 PlayChannelTimed    = external('Mix_PlayChannelTimed',
@@ -64,4 +65,4 @@
 
 """Returns zero if the channel is not playing. 
 Otherwise if you passed in -1, the number of channels playing is returned"""
-ChannelPlaying  = external('Mix_Playing', [ rffi.INT])
\ No newline at end of file
+ChannelPlaying  = external('Mix_Playing', [rffi.INT], rffi.INT)
diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -141,7 +141,8 @@
 def construct_stream_tower(stream, buffering, universal, reading, writing,
                            binary):
     if buffering == 0:   # no buffering
-        pass
+        if reading:      # force some minimal buffering for readline()
+            stream = ReadlineInputStream(stream)
     elif buffering == 1:   # line-buffering
         if writing:
             stream = LineBufferingOutputStream(stream)
@@ -749,6 +750,113 @@
                                               flush_buffers=False)
 
 
+class ReadlineInputStream(Stream):
+
+    """Minimal buffering input stream.
+
+    Only does buffering for readline().  The other kinds of reads, and
+    all writes, are not buffered at all.
+    """
+
+    bufsize = 2**13 # 8 K
+
+    def __init__(self, base, bufsize=-1):
+        self.base = base
+        self.do_read = base.read   # function to fill buffer some more
+        self.do_seek = base.seek   # seek to a byte offset
+        if bufsize == -1:     # Get default from the class
+            bufsize = self.bufsize
+        self.bufsize = bufsize  # buffer size (hint only)
+        self.buf = None         # raw data (may contain "\n")
+        self.bufstart = 0
+
+    def flush_buffers(self):
+        if self.buf is not None:
+            try:
+                self.do_seek(self.bufstart-len(self.buf), 1)
+            except MyNotImplementedError:
+                pass
+            else:
+                self.buf = None
+                self.bufstart = 0
+
+    def readline(self):
+        if self.buf is not None:
+            i = self.buf.find('\n', self.bufstart)
+        else:
+            self.buf = ''
+            i = -1
+        #
+        if i < 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+            while True:
+                bufsize = max(self.bufsize, len(self.buf) >> 2)
+                data = self.do_read(bufsize)
+                if not data:
+                    result = self.buf              # end-of-file reached
+                    self.buf = None
+                    return result
+                startsearch = len(self.buf)   # there is no '\n' in buf so far
+                self.buf += data
+                i = self.buf.find('\n', startsearch)
+                if i >= 0:
+                    break
+        #
+        i += 1
+        result = self.buf[self.bufstart:i]
+        self.bufstart = i
+        return result
+
+    def peek(self):
+        if self.buf is None:
+            return ''
+        if self.bufstart > 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+        return self.buf
+
+    def tell(self):
+        pos = self.base.tell()
+        if self.buf is not None:
+            pos -= (len(self.buf) - self.bufstart)
+        return pos
+
+    def readall(self):
+        result = self.base.readall()
+        if self.buf is not None:
+            result = self.buf[self.bufstart:] + result
+            self.buf = None
+            self.bufstart = 0
+        return result
+
+    def read(self, n):
+        if self.buf is None:
+            return self.do_read(n)
+        else:
+            m = n - (len(self.buf) - self.bufstart)
+            start = self.bufstart
+            if m > 0:
+                result = self.buf[start:] + self.do_read(m)
+                self.buf = None
+                self.bufstart = 0
+                return result
+            elif n >= 0:
+                self.bufstart = start + n
+                return self.buf[start : self.bufstart]
+            else:
+                return ''
+
+    seek       = PassThrough("seek",      flush_buffers=True)
+    write      = PassThrough("write",     flush_buffers=True)
+    truncate   = PassThrough("truncate",  flush_buffers=True)
+    flush      = PassThrough("flush",     flush_buffers=True)
+    flushable  = PassThrough("flushable", flush_buffers=False)
+    close      = PassThrough("close",     flush_buffers=False)
+    try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
+                                              flush_buffers=False)
+
+
 class BufferingOutputStream(Stream):
 
     """Standard buffering output stream.
diff --git a/pypy/rlib/test/test_debug.py b/pypy/rlib/test/test_debug.py
--- a/pypy/rlib/test/test_debug.py
+++ b/pypy/rlib/test/test_debug.py
@@ -1,11 +1,12 @@
 
 import py
-from pypy.rlib.debug import check_annotation, make_sure_not_resized
-from pypy.rlib.debug import debug_print, debug_start, debug_stop
-from pypy.rlib.debug import have_debug_prints, debug_offset, debug_flush
-from pypy.rlib.debug import check_nonneg, IntegerCanBeNegative
+from pypy.rlib.debug import (check_annotation, make_sure_not_resized,
+                             debug_print, debug_start, debug_stop,
+                             have_debug_prints, debug_offset, debug_flush,
+                             check_nonneg, IntegerCanBeNegative,
+                             mark_dict_non_null)
 from pypy.rlib import debug
-from pypy.rpython.test.test_llinterp import interpret
+from pypy.rpython.test.test_llinterp import interpret, gengraph
 
 def test_check_annotation():
     class Error(Exception):
@@ -52,8 +53,17 @@
     py.test.raises(ListChangeUnallowed, interpret, f, [], 
                    list_comprehension_operations=True)
 
+def test_mark_dict_non_null():
+    def f():
+        d = {"ac": "bx"}
+        mark_dict_non_null(d)
+        return d
 
-class DebugTests:
+    t, typer, graph = gengraph(f, [])
+    assert sorted(graph.returnblock.inputargs[0].concretetype.TO.entries.TO.OF._flds.keys()) == ['key', 'value']
+
+
+class DebugTests(object):
 
     def test_debug_print_start_stop(self):
         def f(x):
diff --git a/pypy/rlib/test/test_jit.py b/pypy/rlib/test/test_jit.py
--- a/pypy/rlib/test/test_jit.py
+++ b/pypy/rlib/test/test_jit.py
@@ -1,6 +1,6 @@
 import py
 from pypy.conftest import option
-from pypy.rlib.jit import hint, we_are_jitted, JitDriver, purefunction_promote
+from pypy.rlib.jit import hint, we_are_jitted, JitDriver, elidable_promote
 from pypy.rlib.jit import JitHintError, oopspec
 from pypy.translator.translator import TranslationContext, graphof
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
@@ -31,8 +31,8 @@
         res = self.interpret(f, [4])
         assert res == 5
 
-    def test_purefunction_promote(self):
-        @purefunction_promote()
+    def test_elidable_promote(self):
+        @elidable_promote()
         def g(func):
             return func + 1
         def f(x):
@@ -40,8 +40,8 @@
         res = self.interpret(f, [2])
         assert res == 5
 
-    def test_purefunction_promote_args(self):
-        @purefunction_promote(promote_args='0')
+    def test_elidable_promote_args(self):
+        @elidable_promote(promote_args='0')
         def g(func, x):
             return func + 1
         def f(x):
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -2,8 +2,10 @@
 import sys
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.lltypesystem.ll2ctypes import ALLOCATED
-from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name
+from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
+from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name, make_struct_ffitype_e
 from pypy.rlib.libffi import CDLL, Func, get_libc_name, ArgChain, types
+from pypy.rlib.libffi import longlong2float, float2longlong, IS_32_BIT
 
 class TestLibffiMisc(BaseFfiTest):
 
@@ -50,6 +52,18 @@
         del lib
         assert not ALLOCATED
 
+    def test_longlong_as_float(self):
+        from pypy.translator.c.test.test_genc import compile
+        maxint64 = r_longlong(9223372036854775807)
+        def fn(x):
+            d = longlong2float(x)
+            ll = float2longlong(d)
+            return ll
+        assert fn(maxint64) == maxint64
+        #
+        fn2 = compile(fn, [r_longlong])
+        res = fn2(maxint64)
+        assert res == maxint64
 
 class TestLibffiCall(BaseFfiTest):
     """
@@ -97,7 +111,7 @@
     def get_libfoo(self):
         return self.CDLL(self.libfoo_name)
 
-    def call(self, funcspec, args, RESULT, init_result=0):
+    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
         """
         Call the specified function after constructing and ArgChain with the
         arguments in ``args``.
@@ -114,8 +128,20 @@
         func = lib.getpointer(name, argtypes, restype)
         chain = ArgChain()
         for arg in args:
-            chain.arg(arg)
-        return func.call(chain, RESULT)
+            if isinstance(arg, r_singlefloat):
+                chain.arg_singlefloat(float(arg))
+            elif IS_32_BIT and isinstance(arg, r_longlong):
+                chain.arg_longlong(longlong2float(arg))
+            elif IS_32_BIT and isinstance(arg, r_ulonglong):
+                arg = rffi.cast(rffi.LONGLONG, arg)
+                chain.arg_longlong(longlong2float(arg))
+            elif isinstance(arg, tuple):
+                methname, arg = arg
+                meth = getattr(chain, methname)
+                meth(arg)
+            else:
+                chain.arg(arg)
+        return func.call(chain, RESULT, is_struct=is_struct)
 
     def check_loops(self, *args, **kwds):
         """
@@ -137,7 +163,7 @@
         res = self.call(func, [38, 4.2], rffi.LONG)
         assert res == 42
         self.check_loops({
-                'call_may_force': 1,
+                'call_release_gil': 1,
                 'guard_no_exception': 1,
                 'guard_not_forced': 1,
                 'int_add': 1,
@@ -150,7 +176,7 @@
         func = (libm, 'pow', [types.double, types.double], types.double)
         res = self.call(func, [2.0, 3.0], rffi.DOUBLE, init_result=0.0)
         assert res == 8.0
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_result(self):
         """
@@ -163,7 +189,7 @@
         func = (libfoo, 'cast_to_uchar_and_ovf', [types.sint], types.uchar)
         res = self.call(func, [0], rffi.UCHAR)
         assert res == 200
-        self.check_loops(call_may_force=1, guard_no_exception=1, guard_not_forced=1)
+        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_argument(self):
         """
@@ -267,6 +293,76 @@
         res = self.call(get_dummy, [], rffi.LONG)
         assert res == initval+1
 
+    def test_single_float_args(self):
+        """
+            float sum_xy_float(float x, float y)
+            {
+                return x+y;
+            }
+        """
+        from ctypes import c_float # this is used only to compute the expected result
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_float', [types.float, types.float], types.float)
+        x = r_singlefloat(12.34)
+        y = r_singlefloat(56.78)
+        res = self.call(func, [x, y], rffi.FLOAT, init_result=0.0)
+        expected = c_float(c_float(12.34).value + c_float(56.78).value).value
+        assert res == expected
+
+    def test_slonglong_args(self):
+        """
+            long long sum_xy_longlong(long long x, long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint32 = 2147483647 # we cannot really go above maxint on 64 bits
+                              # (and we would not test anything, as there long
+                              # is the same as long long)
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_longlong', [types.slonglong, types.slonglong],
+                types.slonglong)
+        if IS_32_BIT:
+            x = r_longlong(maxint32+1)
+            y = r_longlong(maxint32+2)
+            zero = longlong2float(r_longlong(0))
+        else:
+            x = maxint32+1
+            y = maxint32+2
+            zero = 0
+        res = self.call(func, [x, y], rffi.LONGLONG, init_result=zero)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+        expected = maxint32*2 + 3
+        assert res == expected
+
+    def test_ulonglong_args(self):
+        """
+            unsigned long long sum_xy_ulonglong(unsigned long long x,
+                                                unsigned long long y)
+            {
+                return x+y;
+            }
+        """
+        maxint64 = 9223372036854775807 # maxint64+1 does not fit into a
+                                       # longlong, but it does into a
+                                       # ulonglong
+        libfoo = self.get_libfoo()
+        func = (libfoo, 'sum_xy_ulonglong', [types.ulonglong, types.ulonglong],
+                types.ulonglong)
+        x = r_ulonglong(maxint64+1)
+        y = r_ulonglong(2)
+        res = self.call(func, [x, y], rffi.ULONGLONG, init_result=0)
+        if IS_32_BIT:
+            # obscure, on 32bit it's really a long long, so it returns a
+            # DOUBLE because of the JIT hack
+            res = float2longlong(res)
+            res = rffi.cast(rffi.ULONGLONG, res)
+        expected = maxint64 + 3
+        assert res == expected
+
     def test_wrong_number_of_arguments(self):
         from pypy.rpython.llinterp import LLException
         libfoo = self.get_libfoo() 
@@ -287,3 +383,57 @@
 
         my_raises("self.call(func, [38], rffi.LONG)") # one less
         my_raises("self.call(func, [38, 12.3, 42], rffi.LONG)") # one more
+
+
+    def test_byval_argument(self):
+        """
+            struct Point {
+                long x;
+                long y;
+            };
+
+            long sum_point(struct Point p) {
+                return p.x + p.y;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+        sum_point = (libfoo, 'sum_point', [ffi_point], types.slong)
+        #
+        ARRAY = rffi.CArray(rffi.LONG)
+        buf = lltype.malloc(ARRAY, 2, flavor='raw')
+        buf[0] = 30
+        buf[1] = 12
+        adr = rffi.cast(rffi.VOIDP, buf)
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG, init_result=0)
+        assert res == 42
+        # check that we still have the ownership on the buffer
+        assert buf[0] == 30
+        assert buf[1] == 12
+        lltype.free(buf, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
+
+    def test_byval_result(self):
+        """
+            struct Point make_point(long x, long y) {
+                struct Point p;
+                p.x = x;
+                p.y = y;
+                return p;
+            }
+        """
+        libfoo = CDLL(self.libfoo_name)
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point = ffi_point_struct.ffistruct
+
+        libfoo = CDLL(self.libfoo_name)
+        make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
+        #
+        PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
+        p = self.call(make_point, [12, 34], PTR, init_result=lltype.nullptr(PTR.TO),
+                      is_struct=True)
+        assert p[0] == 12
+        assert p[1] == 34
+        lltype.free(p, flavor='raw')
+        lltype.free(ffi_point_struct, flavor='raw')
diff --git a/pypy/rlib/test/test_rrandom.py b/pypy/rlib/test/test_rrandom.py
--- a/pypy/rlib/test/test_rrandom.py
+++ b/pypy/rlib/test/test_rrandom.py
@@ -3,6 +3,12 @@
 
 # the numbers were created by using CPython's _randommodule.c
 
+def test_init_from_zero():
+    rnd = Random(0)
+    assert rnd.state[:14] == [0, 1, 1812433255, 1900727105, 1208447044,
+            2481403966, 4042607538, 337614300, 3232553940,
+            1018809052, 3202401494, 1775180719, 3192392114, 594215549]
+
 def test_init_from_seed():
     rnd = Random(1000)
     assert rnd.state[:14] == [1000, 4252021385, 1724402292, 571538732,
diff --git a/pypy/rlib/test/test_streamio.py b/pypy/rlib/test/test_streamio.py
--- a/pypy/rlib/test/test_streamio.py
+++ b/pypy/rlib/test/test_streamio.py
@@ -1008,6 +1008,75 @@
             assert base.buf == data
 
 
+class TestReadlineInputStream:
+
+    packets = ["a", "b", "\n", "def", "\nxy\npq\nuv", "wx"]
+    lines = ["ab\n", "def\n", "xy\n", "pq\n", "uvwx"]
+
+    def makeStream(self, seek=False, tell=False, bufsize=-1):
+        base = TSource(self.packets)
+        self.source = base
+        def f(*args):
+            if seek is False:
+                raise NotImplementedError     # a bug!
+            if seek is None:
+                raise streamio.MyNotImplementedError   # can be caught
+            raise ValueError(seek)  # uh?
+        if not tell:
+            base.tell = f
+        if not seek:
+            base.seek = f
+        return streamio.ReadlineInputStream(base, bufsize)
+
+    def test_readline(self):
+        for file in [self.makeStream(), self.makeStream(bufsize=2)]:
+            i = 0
+            while 1:
+                r = file.readline()
+                if r == "":
+                    break
+                assert self.lines[i] == r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved(self):
+        for file in [self.makeStream(seek=True),
+                     self.makeStream(seek=True, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved_no_seek(self):
+        for file in [self.makeStream(seek=None),
+                     self.makeStream(seek=None, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_readall(self):
+        file = self.makeStream(seek=True, tell=True, bufsize=2)
+        r = file.readline()
+        assert r == 'ab\n'
+        assert file.tell() == 3
+        r = file.readall()
+        assert r == 'def\nxy\npq\nuvwx'
+        r = file.readall()
+        assert r == ''
+
 
 # Speed test
 
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -737,9 +737,12 @@
     def op_zero_gc_pointers_inside(self, obj):
         raise NotImplementedError("zero_gc_pointers_inside")
 
-    def op_gc_writebarrier_before_copy(self, source, dest):
+    def op_gc_writebarrier_before_copy(self, source, dest,
+                                       source_start, dest_start, length):
         if hasattr(self.heap, 'writebarrier_before_copy'):
-            return self.heap.writebarrier_before_copy(source, dest)
+            return self.heap.writebarrier_before_copy(source, dest,
+                                                      source_start, dest_start,
+                                                      length)
         else:
             return True
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -37,7 +37,9 @@
     if far_regions:
         import random
         pieces = far_regions._ll2ctypes_pieces
-        num = random.randrange(len(pieces))
+        num = random.randrange(len(pieces)+1)
+        if num == len(pieces):
+            return ctype()
         i1, stop = pieces[num]
         i2 = i1 + ((ctypes.sizeof(ctype) or 1) + 7) & ~7
         if i2 > stop:
@@ -418,6 +420,9 @@
     instance._storage = ctypes_storage
     assert ctypes_storage   # null pointer?
 
+class NotCtypesAllocatedStructure(ValueError):
+    pass
+
 class _parentable_mixin(object):
     """Mixin added to _parentable containers when they become ctypes-based.
     (This is done by changing the __class__ of the instance to reference
@@ -436,7 +441,7 @@
     def _addressof_storage(self):
         "Returns the storage address as an int"
         if self._storage is None or self._storage is True:
-            raise ValueError("Not a ctypes allocated structure")
+            raise NotCtypesAllocatedStructure("Not a ctypes allocated structure")
         return intmask(ctypes.cast(self._storage, ctypes.c_void_p).value)
 
     def _free(self):
diff --git a/pypy/rpython/lltypesystem/ll_str.py b/pypy/rpython/lltypesystem/ll_str.py
--- a/pypy/rpython/lltypesystem/ll_str.py
+++ b/pypy/rpython/lltypesystem/ll_str.py
@@ -1,12 +1,13 @@
 from pypy.rpython.lltypesystem.lltype import GcArray, Array, Char, malloc
 from pypy.rpython.annlowlevel import llstr
 from pypy.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong
+from pypy.rlib import jit
 
 CHAR_ARRAY = GcArray(Char)
 
+ at jit.elidable
 def ll_int_str(repr, i):
     return ll_int2dec(i)
-ll_int_str._pure_function_ = True
 
 def ll_unsigned(i):
     if isinstance(i, r_longlong) or isinstance(i, r_ulonglong):
@@ -14,6 +15,7 @@
     else:
         return r_uint(i)
 
+ at jit.elidable
 def ll_int2dec(i):
     from pypy.rpython.lltypesystem.rstr import mallocstr
     temp = malloc(CHAR_ARRAY, 20)
@@ -44,13 +46,13 @@
         result.chars[j] = temp[len-j-1]
         j += 1
     return result
-ll_int2dec._pure_function_ = True
 
 hex_chars = malloc(Array(Char), 16, immortal=True)
 
 for i in range(16):
     hex_chars[i] = "%x"%i
 
+ at jit.elidable
 def ll_int2hex(i, addPrefix):
     from pypy.rpython.lltypesystem.rstr import mallocstr
     temp = malloc(CHAR_ARRAY, 20)
@@ -86,8 +88,8 @@
         result.chars[j] = temp[len-j-1]
         j += 1
     return result
-ll_int2hex._pure_function_ = True
 
+ at jit.elidable
 def ll_int2oct(i, addPrefix):
     from pypy.rpython.lltypesystem.rstr import mallocstr
     if i == 0:
@@ -123,9 +125,8 @@
         result.chars[j] = temp[len-j-1]
         j += 1
     return result
-ll_int2oct._pure_function_ = True
 
+ at jit.elidable
 def ll_float_str(repr, f):
     from pypy.rlib.rfloat import formatd
     return llstr(formatd(f, 'f', 6))
-ll_float_str._pure_function_ = True
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -831,7 +831,7 @@
     raise TypeError, "unsupported cast"
 
 def _cast_whatever(TGT, value):
-    from pypy.rpython.lltypesystem import llmemory
+    from pypy.rpython.lltypesystem import llmemory, rffi
     ORIG = typeOf(value)
     if ORIG == TGT:
         return value
@@ -847,6 +847,8 @@
                 return cast_pointer(TGT, value)
         elif ORIG == llmemory.Address:
             return llmemory.cast_adr_to_ptr(value, TGT)
+        elif TGT == rffi.VOIDP and ORIG == Unsigned:
+            return rffi.cast(TGT, value)
         elif ORIG == Signed:
             return cast_int_to_ptr(TGT, value)
     elif TGT == llmemory.Address and isinstance(ORIG, Ptr):
diff --git a/pypy/rpython/lltypesystem/module/ll_math.py b/pypy/rpython/lltypesystem/module/ll_math.py
--- a/pypy/rpython/lltypesystem/module/ll_math.py
+++ b/pypy/rpython/lltypesystem/module/ll_math.py
@@ -58,7 +58,7 @@
 math_log10 = llexternal('log10', [rffi.DOUBLE], rffi.DOUBLE)
 math_copysign = llexternal(underscore + 'copysign',
                            [rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE,
-                           pure_function=True)
+                           elidable_function=True)
 math_atan2 = llexternal('atan2', [rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE)
 math_frexp = llexternal('frexp', [rffi.DOUBLE, rffi.INTP], rffi.DOUBLE)
 math_modf  = llexternal('modf',  [rffi.DOUBLE, rffi.DOUBLEP], rffi.DOUBLE)
@@ -67,11 +67,11 @@
 math_fmod  = llexternal('fmod',  [rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE)
 math_hypot = llexternal(underscore + 'hypot',
                         [rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE)
-math_floor = llexternal('floor', [rffi.DOUBLE], rffi.DOUBLE, pure_function=True)
+math_floor = llexternal('floor', [rffi.DOUBLE], rffi.DOUBLE, elidable_function=True)
 
 math_sqrt = llexternal('sqrt', [rffi.DOUBLE], rffi.DOUBLE)
 
- at jit.purefunction
+ at jit.elidable
 def sqrt_nonneg(x):
     return math_sqrt(x)
 sqrt_nonneg.oopspec = "math.sqrt_nonneg(x)"
diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -473,12 +473,16 @@
     checkadr(addr2)
     return addr1 - addr2
 
-def op_gc_writebarrier_before_copy(source, dest):
+def op_gc_writebarrier_before_copy(source, dest,
+                                   source_start, dest_start, length):
     A = lltype.typeOf(source)
     assert A == lltype.typeOf(dest)
     assert isinstance(A.TO, lltype.GcArray)
     assert isinstance(A.TO.OF, lltype.Ptr)
     assert A.TO.OF.TO._gckind == 'gc'
+    assert type(source_start) is int
+    assert type(dest_start) is int
+    assert type(length) is int
     return True
 
 def op_getfield(p, name):
diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -9,6 +9,7 @@
 from pypy.rpython import robject
 from pypy.rlib import objectmodel, jit
 from pypy.rpython import rmodel
+from pypy.rpython.error import TyperError
 
 HIGHEST_BIT = intmask(1 << (LONG_BIT - 1))
 MASK = intmask(HIGHEST_BIT - 1)
@@ -42,7 +43,7 @@
 class DictRepr(AbstractDictRepr):
 
     def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
-                 custom_eq_hash=None):
+                 custom_eq_hash=None, force_non_null=False):
         self.rtyper = rtyper
         self.DICT = lltype.GcForwardReference()
         self.lowleveltype = lltype.Ptr(self.DICT)
@@ -61,6 +62,7 @@
         self.dictvalue = dictvalue
         self.dict_cache = {}
         self._custom_eq_hash_repr = custom_eq_hash
+        self.force_non_null = force_non_null
         # setup() needs to be called to finish this initialization
 
     def _externalvsinternal(self, rtyper, item_repr):
@@ -97,6 +99,13 @@
             s_value = self.dictvalue.s_value
             nullkeymarker = not self.key_repr.can_ll_be_null(s_key)
             nullvaluemarker = not self.value_repr.can_ll_be_null(s_value)
+            if self.force_non_null:
+                if not nullkeymarker:
+                    rmodel.warning("%s can be null, but forcing non-null in dict key" % s_key)
+                    nullkeymarker = True
+                if not nullvaluemarker:
+                    rmodel.warning("%s can be null, but forcing non-null in dict value" % s_value)
+                    nullvaluemarker = True
             dummykeyobj = self.key_repr.get_ll_dummyval_obj(self.rtyper,
                                                             s_key)
             dummyvalueobj = self.value_repr.get_ll_dummyval_obj(self.rtyper,
@@ -206,7 +215,7 @@
         if dictobj is None:
             return lltype.nullptr(self.DICT)
         if not isinstance(dictobj, (dict, objectmodel.r_dict)):
-            raise TyperError("expected a dict: %r" % (dictobj,))
+            raise TypeError("expected a dict: %r" % (dictobj,))
         try:
             key = Constant(dictobj)
             return self.dict_cache[key]
@@ -640,12 +649,15 @@
     pass
 
 
-def rtype_r_dict(hop):
+def rtype_r_dict(hop, i_force_non_null=None):
     r_dict = hop.r_result
     if not r_dict.custom_eq_hash:
         raise TyperError("r_dict() call does not return an r_dict instance")
-    v_eqfn, v_hashfn = hop.inputargs(r_dict.r_rdict_eqfn,
-                                     r_dict.r_rdict_hashfn)
+    v_eqfn = hop.inputarg(r_dict.r_rdict_eqfn, arg=0)
+    v_hashfn = hop.inputarg(r_dict.r_rdict_hashfn, arg=1)
+    if i_force_non_null is not None:
+        assert i_force_non_null == 2
+        hop.inputarg(lltype.Void, arg=2)
     cDICT = hop.inputconst(lltype.Void, r_dict.DICT)
     hop.exception_cannot_occur()
     v_result = hop.gendirectcall(ll_newdict, cDICT)
@@ -833,10 +845,16 @@
 POPITEMINDEX = lltype.Struct('PopItemIndex', ('nextindex', lltype.Signed))
 global_popitem_index = lltype.malloc(POPITEMINDEX, zero=True, immortal=True)
 
-def ll_popitem(ELEM, dic):
+def _ll_getnextitem(dic):
     entries = dic.entries
+    ENTRY = lltype.typeOf(entries).TO.OF
     dmask = len(entries) - 1
-    base = global_popitem_index.nextindex
+    if hasattr(ENTRY, 'f_hash'):
+        if entries.valid(0):
+            return 0
+        base = entries[0].f_hash
+    else:
+        base = global_popitem_index.nextindex
     counter = 0
     while counter <= dmask:
         i = (base + counter) & dmask
@@ -845,8 +863,16 @@
             break
     else:
         raise KeyError
-    global_popitem_index.nextindex += counter
-    entry = entries[i]
+    if hasattr(ENTRY, 'f_hash'):
+        entries[0].f_hash = base + counter
+    else:
+        global_popitem_index.nextindex = base + counter
+    return i
+
+ at jit.dont_look_inside
+def ll_popitem(ELEM, dic):
+    i = _ll_getnextitem(dic)
+    entry = dic.entries[i]
     r = lltype.malloc(ELEM.TO)
     r.item0 = recast(ELEM.TO.item0, entry.key)
     r.item1 = recast(ELEM.TO.item1, entry.value)
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -55,7 +55,7 @@
                compilation_info=ExternalCompilationInfo(),
                sandboxsafe=False, threadsafe='auto',
                _nowrapper=False, calling_conv='c',
-               oo_primitive=None, pure_function=False,
+               oo_primitive=None, elidable_function=False,
                macro=None):
     """Build an external function that will invoke the C function 'name'
     with the given 'args' types and 'result' type.
@@ -87,8 +87,8 @@
                 name, macro, ext_type, compilation_info)
         else:
             _callable = ll2ctypes.LL2CtypesCallable(ext_type, calling_conv)
-    if pure_function:
-        _callable._pure_function_ = True
+    if elidable_function:
+        _callable._elidable_function_ = True
     kwds = {}
     if oo_primitive:
         kwds['oo_primitive'] = oo_primitive
@@ -139,10 +139,10 @@
         source = py.code.Source("""
             def call_external_function(%(argnames)s):
                 before = aroundstate.before
-                after = aroundstate.after
                 if before: before()
                 # NB. it is essential that no exception checking occurs here!
                 res = funcptr(%(argnames)s)
+                after = aroundstate.after
                 if after: after()
                 return res
         """ % locals())
@@ -244,7 +244,7 @@
     def __init__(self):
         self.callbacks = {}
 
-def _make_wrapper_for(TP, callable, callbackholder, aroundstate=None):
+def _make_wrapper_for(TP, callable, callbackholder=None, aroundstate=None):
     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
@@ -253,21 +253,18 @@
     if hasattr(callable, '_errorcode_'):
         errorcode = callable._errorcode_
     else:
-        errorcode = TP.TO.RESULT._example()
+        errorcode = TP.TO.RESULT._defl()
     callable_name = getattr(callable, '__name__', '?')
-    callbackholder.callbacks[callable] = True
+    if callbackholder is not None:
+        callbackholder.callbacks[callable] = True
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
         def wrapper(%s):    # no *args - no GIL for mallocing the tuple
             llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
             if aroundstate is not None:
-                before = aroundstate.before
                 after = aroundstate.after
-            else:
-                before = None
-                after = None
-            if after:
-                after()
+                if after:
+                    after()
             # from now on we hold the GIL
             stackcounter.stacks_counter += 1
             try:
@@ -281,8 +278,10 @@
                     traceback.print_exc()
                 result = errorcode
             stackcounter.stacks_counter -= 1
-            if before:
-                before()
+            if aroundstate is not None:
+                before = aroundstate.before
+                if before:
+                    before()
             # here we don't hold the GIL any more. As in the wrapper() produced
             # by llexternal, it is essential that no exception checking occurs
             # after the call to before().
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -250,12 +250,11 @@
     length = l.length
     l.length = length + 1
     l.ll_setitem_fast(length, newitem)
-ll_append_noresize.oopspec = 'list.append(l, newitem)'
 
 
 def ll_both_none(lst1, lst2):
     return not lst1 and not lst2
-        
+
 
 # ____________________________________________________________
 #
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -4,7 +4,7 @@
 from pypy.rlib.objectmodel import malloc_zero_filled, we_are_translated
 from pypy.rlib.objectmodel import _hash_string, enforceargs
 from pypy.rlib.debug import ll_assert
-from pypy.rlib.jit import purefunction, we_are_jitted
+from pypy.rlib.jit import elidable, we_are_jitted, dont_look_inside
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.robject import PyObjRepr, pyobj_repr
 from pypy.rpython.rmodel import inputconst, IntegerRepr
@@ -57,6 +57,8 @@
                 llmemory.itemoffsetof(TP.chars, 0) +
                 llmemory.sizeof(CHAR_TP) * item)
 
+    # It'd be nice to be able to look inside this function.
+    @dont_look_inside
     @enforceargs(None, None, int, int, int)
     def copy_string_contents(src, dst, srcstart, dststart, length):
         assert srcstart >= 0
@@ -142,7 +144,7 @@
         self.ll = LLHelpers
         self.malloc = mallocunicode
 
-    @purefunction
+    @elidable
     def ll_str(self, s):
         # XXX crazy that this is here, but I don't want to break
         #     rmodel logic
@@ -157,7 +159,7 @@
             result.chars[i] = cast_primitive(Char, c)
         return result
 
-    @purefunction
+    @elidable
     def ll_encode_latin1(self, s):
         length = len(s.chars)
         result = mallocstr(length)
@@ -256,7 +258,7 @@
 
 
 class LLHelpers(AbstractLLHelpers):
-    @purefunction
+    @elidable
     def ll_str_mul(s, times):
         if times < 0:
             times = 0
@@ -278,7 +280,7 @@
             i += j
         return newstr
 
-    @purefunction
+    @elidable
     def ll_char_mul(ch, times):
         if typeOf(ch) is Char:
             malloc = mallocstr
@@ -323,6 +325,7 @@
         return s
     ll_str2unicode.oopspec = 'str.str2unicode(str)'
 
+    @elidable
     def ll_strhash(s):
         # unlike CPython, there is no reason to avoid to return -1
         # but our malloc initializes the memory to zero, so we use zero as the
@@ -334,12 +337,11 @@
                 x = 29872897
             s.hash = x
         return x
-    ll_strhash._pure_function_ = True # it's pure but it does not look like it
 
     def ll_strfasthash(s):
         return s.hash     # assumes that the hash is already computed
 
-    @purefunction
+    @elidable
     def ll_strconcat(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
@@ -349,7 +351,7 @@
         return newstr
     ll_strconcat.oopspec = 'stroruni.concat(s1, s2)'
 
-    @purefunction
+    @elidable
     def ll_strip(s, ch, left, right):
         s_len = len(s.chars)
         if s_len == 0:
@@ -367,7 +369,7 @@
         s.copy_contents(s, result, lpos, 0, r_len)
         return result
 
-    @purefunction
+    @elidable
     def ll_upper(s):
         s_chars = s.chars
         s_len = len(s_chars)
@@ -384,7 +386,7 @@
             i += 1
         return result
 
-    @purefunction
+    @elidable
     def ll_lower(s):
         s_chars = s.chars
         s_len = len(s_chars)
@@ -425,7 +427,7 @@
             i += 1
         return result
 
-    @purefunction
+    @elidable
     def ll_strcmp(s1, s2):
         if not s1 and not s2:
             return True
@@ -448,7 +450,7 @@
             i += 1
         return len1 - len2
 
-    @purefunction
+    @elidable
     def ll_streq(s1, s2):
         if s1 == s2:       # also if both are NULLs
             return True
@@ -468,7 +470,7 @@
         return True
     ll_streq.oopspec = 'stroruni.equal(s1, s2)'
 
-    @purefunction
+    @elidable
     def ll_startswith(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
@@ -484,7 +486,12 @@
 
         return True
 
-    @purefunction
+    def ll_startswith_char(s, ch):
+        if not len(s.chars):
+            return False
+        return s.chars[0] == ch
+
+    @elidable
     def ll_endswith(s1, s2):
         len1 = len(s1.chars)
         len2 = len(s2.chars)
@@ -501,7 +508,12 @@
 
         return True
 
-    @purefunction
+    def ll_endswith_char(s, ch):
+        if not len(s.chars):
+            return False
+        return s.chars[len(s.chars) - 1] == ch
+
+    @elidable
     def ll_find_char(s, ch, start, end):
         i = start
         if end > len(s.chars):
@@ -513,7 +525,7 @@
         return -1
     ll_find_char._annenforceargs_ = [None, None, int, int]
 
-    @purefunction
+    @elidable
     def ll_rfind_char(s, ch, start, end):
         if end > len(s.chars):
             end = len(s.chars)
@@ -524,7 +536,7 @@
                 return i
         return -1
 
-    @purefunction
+    @elidable
     def ll_count_char(s, ch, start, end):
         count = 0
         i = start
@@ -592,7 +604,7 @@
             res = 0
         return res
 
-    @purefunction
+    @elidable
     def ll_search(s1, s2, start, end, mode):
         count = 0
         n = end - start
@@ -715,7 +727,7 @@
             i += 1
         return result
 
-    @purefunction
+    @elidable
     def _ll_stringslice(s1, start, stop):
         lgt = stop - start
         assert start >= 0
@@ -813,7 +825,7 @@
         item.copy_contents(s, item, j, 0, i - j)
         return res
 
-    @purefunction
+    @elidable
     def ll_replace_chr_chr(s, c1, c2):
         length = len(s.chars)
         newstr = s.malloc(length)
@@ -828,7 +840,7 @@
             j += 1
         return newstr
 
-    @purefunction
+    @elidable
     def ll_contains(s, c):
         chars = s.chars
         strlen = len(chars)
@@ -839,7 +851,7 @@
             i += 1
         return False
 
-    @purefunction
+    @elidable
     def ll_int(s, base):
         if not 2 <= base <= 36:
             raise ValueError
diff --git a/pypy/rpython/memory/gc/generation.py b/pypy/rpython/memory/gc/generation.py
--- a/pypy/rpython/memory/gc/generation.py
+++ b/pypy/rpython/memory/gc/generation.py
@@ -517,7 +517,8 @@
             objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
             self.last_generation_root_objects.append(addr_struct)
 
-    def writebarrier_before_copy(self, source_addr, dest_addr):
+    def writebarrier_before_copy(self, source_addr, dest_addr,
+                                 source_start, dest_start, length):
         """ This has the same effect as calling writebarrier over
         each element in dest copied from source, except it might reset
         one of the following flags a bit too eagerly, which means we'll have
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -75,10 +75,16 @@
 
 first_gcflag = 1 << (LONG_BIT//2)
 
-# The following flag is never set on young objects.  It is initially set
-# on all prebuilt and old objects, and gets cleared by the write_barrier()
-# when we write in them a pointer to a young object.
-GCFLAG_NO_YOUNG_PTRS = first_gcflag << 0
+# The following flag is set on objects if we need to do something to
+# track the young pointers that it might contain.  The flag is not set
+# on young objects (unless they are large arrays, see below), and we
+# simply assume that any young object can point to any other young object.
+# For old and prebuilt objects, the flag is usually set, and is cleared
+# when we write a young pointer to it.  For large arrays with
+# GCFLAG_HAS_CARDS, we rely on card marking to track where the
+# young pointers are; the flag GCFLAG_TRACK_YOUNG_PTRS is set in this
+# case too, to speed up the write barrier.
+GCFLAG_TRACK_YOUNG_PTRS = first_gcflag << 0
 
 # The following flag is set on some prebuilt objects.  The flag is set
 # unless the object is already listed in 'prebuilt_root_objects'.
@@ -246,17 +252,19 @@
         self.ac = ArenaCollectionClass(arena_size, page_size,
                                        small_request_threshold)
         #
-        # Used by minor collection: a list of non-young objects that
+        # Used by minor collection: a list of (mostly non-young) objects that
         # (may) contain a pointer to a young object.  Populated by
-        # the write barrier.
-        self.old_objects_pointing_to_young = self.AddressStack()
+        # the write barrier: when we clear GCFLAG_TRACK_YOUNG_PTRS, we
+        # add it to this list.
+        self.objects_pointing_to_young = self.AddressStack()
         #
-        # Similar to 'old_objects_pointing_to_young', but lists objects
+        # Similar to 'objects_pointing_to_young', but lists objects
         # that have the GCFLAG_CARDS_SET bit.  For large arrays.  Note
         # that it is possible for an object to be listed both in here
-        # and in 'old_objects_pointing_to_young', in which case we
+        # and in 'objects_pointing_to_young', in which case we
         # should just clear the cards and trace it fully, as usual.
-        self.old_objects_with_cards_set = self.AddressStack()
+        # Note also that young array objects may be added to this list.
+        self.objects_with_cards_set = self.AddressStack()
         #
         # A list of all prebuilt GC objects that contain pointers to the heap
         self.prebuilt_root_objects = self.AddressStack()
@@ -625,7 +633,7 @@
             # if 'can_make_young'.  The interesting case of 'can_make_young'
             # is for large objects, bigger than the 'large_objects' threshold,
             # which are raw-malloced but still young.
-            extra_flags = GCFLAG_NO_YOUNG_PTRS
+            extra_flags = GCFLAG_TRACK_YOUNG_PTRS
             #
         else:
             # No, so proceed to allocate it externally with raw_malloc().
@@ -643,7 +651,7 @@
                 # Reserve N extra words containing card bits before the object.
                 extra_words = self.card_marking_words_for_length(length)
                 cardheadersize = WORD * extra_words
-                extra_flags = GCFLAG_HAS_CARDS
+                extra_flags = GCFLAG_HAS_CARDS | GCFLAG_TRACK_YOUNG_PTRS
                 # note that if 'can_make_young', then card marking will only
                 # be used later, after (and if) the object becomes old
             #
@@ -686,7 +694,7 @@
                 self.young_rawmalloced_objects.add(result + size_gc_header)
             else:
                 self.old_rawmalloced_objects.append(result + size_gc_header)
-                extra_flags |= GCFLAG_NO_YOUNG_PTRS
+                extra_flags |= GCFLAG_TRACK_YOUNG_PTRS
         #
         # Common code to fill the header and length of the object.
         self.init_gc_object(result, typeid, extra_flags)
@@ -777,7 +785,7 @@
     def init_gc_object_immortal(self, addr, typeid16, flags=0):
         # For prebuilt GC objects, the flags must contain
         # GCFLAG_NO_xxx_PTRS, at least initially.
-        flags |= GCFLAG_NO_HEAP_PTRS | GCFLAG_NO_YOUNG_PTRS
+        flags |= GCFLAG_NO_HEAP_PTRS | GCFLAG_TRACK_YOUNG_PTRS
         self.init_gc_object(addr, typeid16, flags)
 
     def is_in_nursery(self, addr):
@@ -870,8 +878,8 @@
         ll_assert(not self.is_in_nursery(obj),
                   "object in nursery after collection")
         # similarily, all objects should have this flag:
-        ll_assert(self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS,
-                  "missing GCFLAG_NO_YOUNG_PTRS")
+        ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS,
+                  "missing GCFLAG_TRACK_YOUNG_PTRS")
         # the GCFLAG_VISITED should not be set between collections
         ll_assert(self.header(obj).tid & GCFLAG_VISITED == 0,
                   "unexpected GCFLAG_VISITED")
@@ -910,7 +918,7 @@
     # for the JIT: a minimal description of the write_barrier() method
     # (the JIT assumes it is of the shape
     #  "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
-    JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
+    JIT_WB_IF_FLAG = GCFLAG_TRACK_YOUNG_PTRS
 
     @classmethod
     def JIT_max_size_of_young_obj(cls):
@@ -921,13 +929,13 @@
         return cls.minimal_size_in_nursery
 
     def write_barrier(self, newvalue, addr_struct):
-        if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
+        if self.header(addr_struct).tid & GCFLAG_TRACK_YOUNG_PTRS:
             self.remember_young_pointer(addr_struct, newvalue)
 
     def write_barrier_from_array(self, newvalue, addr_array, index):
-        if self.header(addr_array).tid & GCFLAG_NO_YOUNG_PTRS:
+        if self.header(addr_array).tid & GCFLAG_TRACK_YOUNG_PTRS:
             if self.card_page_indices > 0:     # <- constant-folded
-                self.remember_young_pointer_from_array(addr_array, index)
+                self.remember_young_pointer_from_array2(addr_array, index)
             else:
                 self.remember_young_pointer(addr_array, newvalue)
 
@@ -943,20 +951,23 @@
         def remember_young_pointer(addr_struct, newvalue):
             # 'addr_struct' is the address of the object in which we write.
             # 'newvalue' is the address that we are going to write in there.
+            # We know that 'addr_struct' has GCFLAG_TRACK_YOUNG_PTRS so far.
+            #
             if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(self.debug_is_old_object(addr_struct),
-                          "young object with GCFLAG_NO_YOUNG_PTRS")
+                ll_assert(self.debug_is_old_object(addr_struct) or
+                          self.header(addr_struct).tid & GCFLAG_HAS_CARDS != 0,
+                      "young object with GCFLAG_TRACK_YOUNG_PTRS and no cards")
             #
-            # If it seems that what we are writing is a pointer to the nursery
+            # If it seems that what we are writing is a pointer to a young obj
             # (as checked with appears_to_be_young()), then we need
-            # to remove the flag GCFLAG_NO_YOUNG_PTRS and add the old object
-            # to the list 'old_objects_pointing_to_young'.  We know that
+            # to remove the flag GCFLAG_TRACK_YOUNG_PTRS and add the object
+            # to the list 'objects_pointing_to_young'.  We know that
             # 'addr_struct' cannot be in the nursery, because nursery objects
-            # never have the flag GCFLAG_NO_YOUNG_PTRS to start with.
+            # never have the flag GCFLAG_TRACK_YOUNG_PTRS to start with.
             objhdr = self.header(addr_struct)
             if self.appears_to_be_young(newvalue):
-                self.old_objects_pointing_to_young.append(addr_struct)
-                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+                self.objects_pointing_to_young.append(addr_struct)
+                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
             #
             # Second part: if 'addr_struct' is actually a prebuilt GC
             # object and it's the first time we see a write to it, we
@@ -976,20 +987,22 @@
 
     def _init_writebarrier_with_card_marker(self):
         DEBUG = self.DEBUG
-        def remember_young_pointer_from_array(addr_array, index):
+        def remember_young_pointer_from_array2(addr_array, index):
             # 'addr_array' is the address of the object in which we write,
             # which must have an array part;  'index' is the index of the
             # item that is (or contains) the pointer that we write.
-            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(self.debug_is_old_object(addr_array),
-                          "young array with GCFLAG_NO_YOUNG_PTRS")
+            # We know that 'addr_array' has GCFLAG_TRACK_YOUNG_PTRS so far.
+            #
             objhdr = self.header(addr_array)
             if objhdr.tid & GCFLAG_HAS_CARDS == 0:
                 #
+                if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
+                    ll_assert(self.debug_is_old_object(addr_array),
+                        "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
+                #
                 # no cards, use default logic.  Mostly copied from above.
-                self.old_objects_pointing_to_young.append(addr_array)
-                objhdr = self.header(addr_array)
-                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+                self.objects_pointing_to_young.append(addr_array)
+                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
                 if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
                     objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
                     self.prebuilt_root_objects.append(addr_array)
@@ -1002,26 +1015,85 @@
             bitmask = 1 << (bitindex & 7)
             #
             # If the bit is already set, leave now.
-            size_gc_header = self.gcheaderbuilder.size_gc_header
-            addr_byte = addr_array - size_gc_header
-            addr_byte = llarena.getfakearenaaddress(addr_byte) + (~byteindex)
+            addr_byte = self.get_card(addr_array, byteindex)
             byte = ord(addr_byte.char[0])
             if byte & bitmask:
                 return
             #
             # We set the flag (even if the newly written address does not
             # actually point to the nursery, which seems to be ok -- actually
-            # it seems more important that remember_young_pointer_from_array()
+            # it seems more important that remember_young_pointer_from_array2()
             # does not take 3 arguments).
             addr_byte.char[0] = chr(byte | bitmask)
             #
             if objhdr.tid & GCFLAG_CARDS_SET == 0:
-                self.old_objects_with_cards_set.append(addr_array)
+                self.objects_with_cards_set.append(addr_array)
                 objhdr.tid |= GCFLAG_CARDS_SET
 
-        remember_young_pointer_from_array._dont_inline_ = True
-        self.remember_young_pointer_from_array = (
-            remember_young_pointer_from_array)
+        remember_young_pointer_from_array2._dont_inline_ = True
+        assert self.card_page_indices > 0
+        self.remember_young_pointer_from_array2 = (
+            remember_young_pointer_from_array2)
+
+        # xxx trying it out for the JIT: a 3-arguments version of the above
+        def remember_young_pointer_from_array3(addr_array, index, newvalue):
+            objhdr = self.header(addr_array)
+            #
+            # a single check for the common case of neither GCFLAG_HAS_CARDS
+            # nor GCFLAG_NO_HEAP_PTRS
+            if objhdr.tid & (GCFLAG_HAS_CARDS | GCFLAG_NO_HEAP_PTRS) == 0:
+                # common case: fast path, jump to the end of the function
+                pass
+            elif objhdr.tid & GCFLAG_HAS_CARDS == 0:
+                # no cards, but GCFLAG_NO_HEAP_PTRS is set.
+                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+                self.prebuilt_root_objects.append(addr_array)
+                # jump to the end of the function
+            else:
+                # case with cards.
+                #
+                # If the newly written address does not actually point to a
+                # young object, leave now.
+                if not self.appears_to_be_young(newvalue):
+                    return
+                #
+                # 'addr_array' is a raw_malloc'ed array with card markers
+                # in front.  Compute the index of the bit to set:
+                bitindex = index >> self.card_page_shift
+                byteindex = bitindex >> 3
+                bitmask = 1 << (bitindex & 7)
+                #
+                # If the bit is already set, leave now.
+                addr_byte = self.get_card(addr_array, byteindex)
+                byte = ord(addr_byte.char[0])
+                if byte & bitmask:
+                    return
+                addr_byte.char[0] = chr(byte | bitmask)
+                #
+                if objhdr.tid & GCFLAG_CARDS_SET == 0:
+                    self.objects_with_cards_set.append(addr_array)
+                    objhdr.tid |= GCFLAG_CARDS_SET
+                return
+            #
+            # Logic for the no-cards case, put here to minimize the number
+            # of checks done at the start of the function
+            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
+                ll_assert(self.debug_is_old_object(addr_array),
+                        "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
+            #
+            if self.appears_to_be_young(newvalue):
+                self.objects_pointing_to_young.append(addr_array)
+                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
+
+        remember_young_pointer_from_array3._dont_inline_ = True
+        assert self.card_page_indices > 0
+        self.remember_young_pointer_from_array3 = (
+            remember_young_pointer_from_array3)
+
+    def get_card(self, obj, byteindex):
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        addr_byte = obj - size_gc_header
+        return llarena.getfakearenaaddress(addr_byte) + (~byteindex)
 
 
     def assume_young_pointers(self, addr_struct):
@@ -1029,15 +1101,16 @@
         may now contain young pointers.''
         """
         objhdr = self.header(addr_struct)
-        if objhdr.tid & GCFLAG_NO_YOUNG_PTRS:
-            self.old_objects_pointing_to_young.append(addr_struct)
-            objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+        if objhdr.tid & GCFLAG_TRACK_YOUNG_PTRS:
+            self.objects_pointing_to_young.append(addr_struct)
+            objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
             #
             if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
                 objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
                 self.prebuilt_root_objects.append(addr_struct)
 
-    def writebarrier_before_copy(self, source_addr, dest_addr):
+    def writebarrier_before_copy(self, source_addr, dest_addr,
+                                 source_start, dest_start, length):
         """ This has the same effect as calling writebarrier over
         each element in dest copied from source, except it might reset
         one of the following flags a bit too eagerly, which means we'll have
@@ -1045,15 +1118,36 @@
         """
         source_hdr = self.header(source_addr)
         dest_hdr = self.header(dest_addr)
-        if dest_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
+        if dest_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
             return True
         # ^^^ a fast path of write-barrier
         #
-        if (source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0 or
-            source_hdr.tid & GCFLAG_CARDS_SET != 0):
+        if source_hdr.tid & GCFLAG_HAS_CARDS != 0:
+            #
+            if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
+                # The source object may have random young pointers.
+                # Return False to mean "do it manually in ll_arraycopy".
+                return False
+            #
+            if source_hdr.tid & GCFLAG_CARDS_SET == 0:
+                # The source object has no young pointers at all.  Done.
+                return True
+            #
+            if dest_hdr.tid & GCFLAG_HAS_CARDS == 0:
+                # The dest object doesn't have cards.  Do it manually.
+                return False
+            #
+            if source_start != 0 or dest_start != 0:
+                # Misaligned.  Do it manually.
+                return False
+            #
+            self.manually_copy_card_bits(source_addr, dest_addr, length)
+            return True
+        #
+        if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
             # there might be in source a pointer to a young object
-            self.old_objects_pointing_to_young.append(dest_addr)
-            dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+            self.objects_pointing_to_young.append(dest_addr)
+            dest_hdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
         #
         if dest_hdr.tid & GCFLAG_NO_HEAP_PTRS:
             if source_hdr.tid & GCFLAG_NO_HEAP_PTRS == 0:
@@ -1061,6 +1155,22 @@
                 self.prebuilt_root_objects.append(dest_addr)
         return True
 
+    def manually_copy_card_bits(self, source_addr, dest_addr, length):
+        # manually copy the individual card marks from source to dest
+        bytes = self.card_marking_bytes_for_length(length)
+        #
+        i = 0
+        while i < bytes:
+            addr_srcbyte = self.get_card(source_addr, i)
+            addr_dstbyte = self.get_card(dest_addr, i)
+            byte = ord(addr_srcbyte.char[0])
+            addr_dstbyte.char[0] = chr(ord(addr_dstbyte.char[0]) | byte)
+            i += 1
+        #
+        dest_hdr = self.header(dest_addr)
+        if dest_hdr.tid & GCFLAG_CARDS_SET == 0:
+            self.objects_with_cards_set.append(dest_addr)
+            dest_hdr.tid |= GCFLAG_CARDS_SET
 
     # ----------
     # Nursery collection
@@ -1077,20 +1187,28 @@
         # Note that during this step, we ignore references to further
         # young objects; only objects directly referenced by roots
         # are copied out or flagged.  They are also added to the list
-        # 'old_objects_pointing_to_young'.
+        # 'objects_pointing_to_young'.
         self.collect_roots_in_nursery()
         #
-        # If we are using card marking, do a partial trace of the arrays
-        # that are flagged with GCFLAG_CARDS_SET.
-        if self.card_page_indices > 0:
-            self.collect_cardrefs_to_nursery()
-        #
-        # Now trace objects from 'old_objects_pointing_to_young'.
-        # All nursery objects they reference are copied out of the
-        # nursery, and again added to 'old_objects_pointing_to_young'.
-        # All young raw-malloced object found is flagged GCFLAG_VISITED.
-        # We proceed until 'old_objects_pointing_to_young' is empty.
-        self.collect_oldrefs_to_nursery()
+        while True:
+            # If we are using card marking, do a partial trace of the arrays
+            # that are flagged with GCFLAG_CARDS_SET.
+            if self.card_page_indices > 0:
+                self.collect_cardrefs_to_nursery()
+            #
+            # Now trace objects from 'objects_pointing_to_young'.
+            # All nursery objects they reference are copied out of the
+            # nursery, and again added to 'objects_pointing_to_young'.
+            # All young raw-malloced object found is flagged GCFLAG_VISITED.
+            # We proceed until 'objects_pointing_to_young' is empty.
+            self.collect_oldrefs_to_nursery()
+            #
+            # We have to loop back if collect_oldrefs_to_nursery caused
+            # new objects to show up in objects_with_cards_set
+            if self.card_page_indices > 0:
+                if self.objects_with_cards_set.non_empty():
+                    continue
+            break
         #
         # Now all live nursery objects should be out.  Update the young
         # weakrefs' targets.
@@ -1123,7 +1241,7 @@
         # we don't need to trace prebuilt GcStructs during a minor collect:
         # if a prebuilt GcStruct contains a pointer to a young object,
         # then the write_barrier must have ensured that the prebuilt
-        # GcStruct is in the list self.old_objects_pointing_to_young.
+        # GcStruct is in the list self.objects_pointing_to_young.
         self.root_walker.walk_roots(
             MiniMarkGC._trace_drag_out1,  # stack roots
             MiniMarkGC._trace_drag_out1,  # static in prebuilt non-gc
@@ -1131,7 +1249,7 @@
 
     def collect_cardrefs_to_nursery(self):
         size_gc_header = self.gcheaderbuilder.size_gc_header
-        oldlist = self.old_objects_with_cards_set
+        oldlist = self.objects_with_cards_set
         while oldlist.non_empty():
             obj = oldlist.pop()
             #
@@ -1147,11 +1265,11 @@
             bytes = self.card_marking_bytes_for_length(length)
             p = llarena.getfakearenaaddress(obj - size_gc_header)
             #
-            # If the object doesn't have GCFLAG_NO_YOUNG_PTRS, then it
-            # means that it is in 'old_objects_pointing_to_young' and
+            # If the object doesn't have GCFLAG_TRACK_YOUNG_PTRS, then it
+            # means that it is in 'objects_pointing_to_young' and
             # will be fully traced by collect_oldrefs_to_nursery() just
             # afterwards.
-            if self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS == 0:
+            if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
                 #
                 # In that case, we just have to reset all card bits.
                 while bytes > 0:
@@ -1187,19 +1305,30 @@
 
 
     def collect_oldrefs_to_nursery(self):
-        # Follow the old_objects_pointing_to_young list and move the
+        # Follow the objects_pointing_to_young list and move the
         # young objects they point to out of the nursery.
-        oldlist = self.old_objects_pointing_to_young
+        oldlist = self.objects_pointing_to_young
         while oldlist.non_empty():
             obj = oldlist.pop()
             #
-            # Add the flag GCFLAG_NO_YOUNG_PTRS.  All live objects should have
-            # this flag set after a nursery collection.
-            self.header(obj).tid |= GCFLAG_NO_YOUNG_PTRS
+            # Check (somehow) that the flags are correct: we must not have
+            # GCFLAG_TRACK_YOUNG_PTRS so far.  But in a rare case, it's
+            # possible that the same obj is appended twice to the list
+            # (see _trace_drag_out, GCFLAG_VISITED case).  Filter it out
+            # here.
+            if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0:
+                ll_assert(self.header(obj).tid & GCFLAG_VISITED != 0,
+                          "objects_pointing_to_young contains obj with "
+                          "GCFLAG_TRACK_YOUNG_PTRS and not GCFLAG_VISITED")
+                continue
+            #
+            # Add the flag GCFLAG_TRACK_YOUNG_PTRS.  All live objects should
+            # have this flag set after a nursery collection.
+            self.header(obj).tid |= GCFLAG_TRACK_YOUNG_PTRS
             #
             # Trace the 'obj' to replace pointers to nursery with pointers
             # outside the nursery, possibly forcing nursery objects out
-            # and adding them to 'old_objects_pointing_to_young' as well.
+            # and adding them to 'objects_pointing_to_young' as well.
             self.trace_and_drag_out_of_nursery(obj)
 
     def trace_and_drag_out_of_nursery(self, obj):
@@ -1238,7 +1367,19 @@
                 # 'obj' points to a young, raw-malloced object
                 if (self.header(obj).tid & GCFLAG_VISITED) == 0:
                     self.header(obj).tid |= GCFLAG_VISITED
-                    self.old_objects_pointing_to_young.append(obj)
+                    #
+                    # we just made 'obj' old, so we may need to add it
+                    # in the correct list:
+                    if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
+                        # common case: GCFLAG_TRACK_YOUNG_PTRS is not set, so
+                        # the object may contain young pointers anywhere
+                        self.objects_pointing_to_young.append(obj)
+                    else:
+                        # large array case: the object contains card marks
+                        # that tell us where young pointers are, and it
+                        # is already in objects_with_cards_set.
+                        ll_assert(self.header(obj).tid & GCFLAG_HAS_CARDS != 0,
+                                  "neither YOUNG_PTRS nor HAS_CARDS??")
             return
         #
         # If 'obj' was already forwarded, change it to its forwarding address.
@@ -1285,11 +1426,11 @@
         # Change the original pointer to this object.
         root.address[0] = newobj
         #
-        # Add the newobj to the list 'old_objects_pointing_to_young',
+        # Add the newobj to the list 'objects_pointing_to_young',
         # because it can contain further pointers to other young objects.
         # We will fix such references to point to the copy of the young
-        # objects when we walk 'old_objects_pointing_to_young'.
-        self.old_objects_pointing_to_young.append(newobj)
+        # objects when we walk 'objects_pointing_to_young'.
+        self.objects_pointing_to_young.append(newobj)
 
 
     def _malloc_out_of_nursery(self, totalsize):
diff --git a/pypy/rpython/memory/gc/test/test_direct.py b/pypy/rpython/memory/gc/test/test_direct.py
--- a/pypy/rpython/memory/gc/test/test_direct.py
+++ b/pypy/rpython/memory/gc/test/test_direct.py
@@ -522,5 +522,78 @@
             self.stackroots.pop()
     test_card_marker.GC_PARAMS = {"card_page_indices": 4}
 
+    def test_writebarrier_before_copy(self):
+        from pypy.rpython.memory.gc import minimark
+        largeobj_size =  self.gc.nonlarge_max + 1
+        p_src = self.malloc(VAR, largeobj_size)
+        p_dst = self.malloc(VAR, largeobj_size)
+        # make them old
+        self.stackroots.append(p_src)
+        self.stackroots.append(p_dst)
+        self.gc.collect()
+        p_dst = self.stackroots.pop()
+        p_src = self.stackroots.pop()
+        #
+        addr_src = llmemory.cast_ptr_to_adr(p_src)
+        addr_dst = llmemory.cast_ptr_to_adr(p_dst)
+        hdr_src = self.gc.header(addr_src)
+        hdr_dst = self.gc.header(addr_dst)
+        #
+        assert hdr_src.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS
+        assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS
+        #
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst, 0, 0, 10)
+        assert res
+        assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS
+        #
+        hdr_src.tid &= ~minimark.GCFLAG_TRACK_YOUNG_PTRS  # pretend we have young ptrs
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst, 0, 0, 10)
+        assert res # we optimized it
+        assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS == 0 # and we copied the flag
+        #
+        hdr_src.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
+        hdr_dst.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
+        hdr_src.tid |= minimark.GCFLAG_HAS_CARDS
+        hdr_src.tid |= minimark.GCFLAG_CARDS_SET
+        # hdr_dst.tid does not have minimark.GCFLAG_HAS_CARDS
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst, 0, 0, 10)
+        assert not res # there might be young ptrs, let ll_arraycopy to find them
+
+    def test_writebarrier_before_copy_preserving_cards(self):
+        from pypy.rpython.lltypesystem import llarena
+        from pypy.rpython.memory.gc import minimark
+        tid = self.get_type_id(VAR)
+        largeobj_size =  self.gc.nonlarge_max + 1
+        addr_src = self.gc.external_malloc(tid, largeobj_size)
+        addr_dst = self.gc.external_malloc(tid, largeobj_size)
+        hdr_src = self.gc.header(addr_src)
+        hdr_dst = self.gc.header(addr_dst)
+        #
+        assert hdr_src.tid & minimark.GCFLAG_HAS_CARDS
+        assert hdr_dst.tid & minimark.GCFLAG_HAS_CARDS
+        #
+        young_p = self.malloc(S)
+        self.gc.write_barrier_from_array(young_p, addr_src, 0)
+        index_in_third_page = int(2.5 * self.gc.card_page_indices)
+        assert index_in_third_page < largeobj_size
+        self.gc.write_barrier_from_array(young_p, addr_src,
+                                         index_in_third_page)
+        #
+        assert hdr_src.tid & minimark.GCFLAG_CARDS_SET
+        addr_byte = self.gc.get_card(addr_src, 0)
+        assert ord(addr_byte.char[0]) == 0x01 | 0x04  # bits 0 and 2
+        #
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst,
+                                             0, 0, 2*self.gc.card_page_indices)
+        assert res
+        #
+        assert hdr_dst.tid & minimark.GCFLAG_CARDS_SET
+        addr_byte = self.gc.get_card(addr_dst, 0)
+        assert ord(addr_byte.char[0]) == 0x01 | 0x04  # bits 0 and 2
+
+    test_writebarrier_before_copy_preserving_cards.GC_PARAMS = {
+        "card_page_indices": 4}
+
+
 class TestMiniMarkGCFull(DirectGCTest):
     from pypy.rpython.memory.gc.minimark import MiniMarkGC as GCClass
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -322,7 +322,8 @@
         if hasattr(GCClass, 'writebarrier_before_copy'):
             self.wb_before_copy_ptr = \
                     getfn(GCClass.writebarrier_before_copy.im_func,
-                    [s_gc] + [annmodel.SomeAddress()] * 2, annmodel.SomeBool())
+                    [s_gc] + [annmodel.SomeAddress()] * 2 +
+                    [annmodel.SomeInteger()] * 3, annmodel.SomeBool())
         elif GCClass.needs_write_barrier:
             raise NotImplementedError("GC needs write barrier, but does not provide writebarrier_before_copy functionality")
 
@@ -463,7 +464,7 @@
                                             annmodel.SomeInteger()],
                                            annmodel.s_None,
                                            inline=True)
-                func = getattr(gcdata.gc, 'remember_young_pointer_from_array',
+                func = getattr(gcdata.gc, 'remember_young_pointer_from_array3',
                                None)
                 if func is not None:
                     # func should not be a bound method, but a real function
@@ -471,7 +472,8 @@
                     self.write_barrier_from_array_failing_case_ptr = \
                                              getfn(func,
                                                    [annmodel.SomeAddress(),
-                                                    annmodel.SomeInteger()],
+                                                    annmodel.SomeInteger(),
+                                                    annmodel.SomeAddress()],
                                                    annmodel.s_None)
         self.statistics_ptr = getfn(GCClass.statistics.im_func,
                                     [s_gc, annmodel.SomeInteger()],
@@ -860,9 +862,9 @@
 
     def gct_get_write_barrier_from_array_failing_case(self, hop):
         op = hop.spaceop
-        hop.genop("same_as",
-                  [self.write_barrier_from_array_failing_case_ptr],
-                  resultvar=op.result)
+        v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+                    lltype.nullptr(op.result.concretetype.TO))
+        hop.genop("same_as", [v], resultvar=op.result)
 
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:
@@ -883,7 +885,7 @@
         dest_addr = hop.genop('cast_ptr_to_adr', [op.args[1]],
                                 resulttype=llmemory.Address)
         hop.genop('direct_call', [self.wb_before_copy_ptr, self.c_const_gc,
-                                  source_addr, dest_addr],
+                                  source_addr, dest_addr] + op.args[2:],
                   resultvar=op.result)
 
     def gct_weakref_create(self, hop):
diff --git a/pypy/rpython/memory/gctransform/test/test_framework.py b/pypy/rpython/memory/gctransform/test/test_framework.py
--- a/pypy/rpython/memory/gctransform/test/test_framework.py
+++ b/pypy/rpython/memory/gctransform/test/test_framework.py
@@ -163,7 +163,8 @@
     GC_PARAMS = {}
     class GCClass(MarkSweepGC):
         needs_write_barrier = True
-        def writebarrier_before_copy(self, source, dest):
+        def writebarrier_before_copy(self, source, dest,
+                                     source_start, dest_start, length):
             return True
 
 def write_barrier_check(spaceop, needs_write_barrier=True):
diff --git a/pypy/rpython/memory/gcwrapper.py b/pypy/rpython/memory/gcwrapper.py
--- a/pypy/rpython/memory/gcwrapper.py
+++ b/pypy/rpython/memory/gcwrapper.py
@@ -136,11 +136,14 @@
         ptr = lltype.cast_opaque_ptr(llmemory.GCREF, ptr)
         return self.gc.id(ptr)
 
-    def writebarrier_before_copy(self, source, dest):
+    def writebarrier_before_copy(self, source, dest,
+                                 source_start, dest_start, length):
         if self.gc.needs_write_barrier:
             source_addr = llmemory.cast_ptr_to_adr(source)
             dest_addr   = llmemory.cast_ptr_to_adr(dest)
-            return self.gc.writebarrier_before_copy(source_addr, dest_addr)
+            return self.gc.writebarrier_before_copy(source_addr, dest_addr,
+                                                    source_start, dest_start,
+                                                    length)
         else:
             return True
 
diff --git a/pypy/rpython/memory/support.py b/pypy/rpython/memory/support.py
--- a/pypy/rpython/memory/support.py
+++ b/pypy/rpython/memory/support.py
@@ -140,6 +140,14 @@
             self.foreach(_add_in_dict, result)
             return result
 
+        def tolist(self):
+            """NOT_RPYTHON.  Returns the content as a list."""
+            lst = []
+            def _add(obj, lst):
+                lst.append(obj)
+            self.foreach(_add, lst)
+            return lst
+
         def remove(self, addr):
             """Remove 'addr' from the stack.  The addr *must* be in the list,
             and preferrably near the top.
diff --git a/pypy/rpython/module/test/test_posix.py b/pypy/rpython/module/test/test_posix.py
--- a/pypy/rpython/module/test/test_posix.py
+++ b/pypy/rpython/module/test/test_posix.py
@@ -43,6 +43,17 @@
         for i in range(len(stat)):
             assert long(getattr(func, 'item%d' % i)) == stat[i]
 
+    def test_stat_exception(self):
+        def fo():
+            try:
+                posix.stat('I/do/not/exist')
+            except OSError:
+                return True
+            else:
+                return False
+        res = self.interpret(fo,[])
+        assert res
+
     def test_times(self):
         import py; py.test.skip("llinterp does not like tuple returns")
         from pypy.rpython.test.test_llinterp import interpret
@@ -205,5 +216,8 @@
     def test_stat(self):
         py.test.skip("ootypesystem does not support os.stat")
 
+    def test_stat_exception(self):
+        py.test.skip("ootypesystem does not support os.stat")
+
     def test_chown(self):
         py.test.skip("ootypesystem does not support os.chown")
diff --git a/pypy/rpython/ootypesystem/ootype.py b/pypy/rpython/ootypesystem/ootype.py
--- a/pypy/rpython/ootypesystem/ootype.py
+++ b/pypy/rpython/ootypesystem/ootype.py
@@ -433,7 +433,9 @@
             "ll_streq": Meth([self.SELFTYPE_T], Bool),
             "ll_strcmp": Meth([self.SELFTYPE_T], Signed),
             "ll_startswith": Meth([self.SELFTYPE_T], Bool),
+            "ll_startswith_char": Meth([self.CHAR], Bool),
             "ll_endswith": Meth([self.SELFTYPE_T], Bool),
+            "ll_endswith_char": Meth([self.CHAR], Bool),
             "ll_find": Meth([self.SELFTYPE_T, Signed, Signed], Signed),
             "ll_rfind": Meth([self.SELFTYPE_T, Signed, Signed], Signed),
             "ll_count": Meth([self.SELFTYPE_T, Signed, Signed], Signed),
@@ -1429,10 +1431,18 @@
         # NOT_RPYTHON
         return self._str.startswith(s._str)
 
+    def ll_startswith_char(self, s):
+        # NOT_RPYTHON
+        return self._str.startswith(s)
+
     def ll_endswith(self, s):
         # NOT_RPYTHON
         return self._str.endswith(s._str)
 
+    def ll_endswith_char(self, s):
+        # NOT_RPYTHON
+        return self._str.endswith(s)
+
     def ll_find(self, s, start, end):
         # NOT_RPYTHON
         if start > len(self._str):  # workaround to cope with corner case
diff --git a/pypy/rpython/ootypesystem/rclass.py b/pypy/rpython/ootypesystem/rclass.py
--- a/pypy/rpython/ootypesystem/rclass.py
+++ b/pypy/rpython/ootypesystem/rclass.py
@@ -264,7 +264,8 @@
 
         for name, attrdef in selfattrs.iteritems():
             if not attrdef.readonly and self.is_quasi_immutable(name):
-                ootype.addFields(self.lowleveltype, {'mutable_'+name: OBJECT})
+                name = mangle('mutable_' + name, self.rtyper.getconfig())
+                ootype.addFields(self.lowleveltype, {name: OBJECT})
 
         classattributes = {}
         baseInstance = self.lowleveltype._superclass
diff --git a/pypy/rpython/ootypesystem/rdict.py b/pypy/rpython/ootypesystem/rdict.py
--- a/pypy/rpython/ootypesystem/rdict.py
+++ b/pypy/rpython/ootypesystem/rdict.py
@@ -18,7 +18,7 @@
 
 class DictRepr(AbstractDictRepr):
     def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
-                 custom_eq_hash=None):
+                 custom_eq_hash=None, force_non_null=False):
         self.rtyper = rtyper
         self.custom_eq_hash = custom_eq_hash is not None
 
diff --git a/pypy/rpython/ootypesystem/test/test_oopbc.py b/pypy/rpython/ootypesystem/test/test_oopbc.py
--- a/pypy/rpython/ootypesystem/test/test_oopbc.py
+++ b/pypy/rpython/ootypesystem/test/test_oopbc.py
@@ -81,3 +81,18 @@
     res = interpret(f, [1], type_system='ootype')
     assert res == 2
 
+def test_quasi_immutable():
+    class A(object):
+        _immutable_fields_ = ['x?']
+        def __init__(self):
+            self.x = 3
+        def foo(self):
+            return self.x
+
+    a = A()
+
+    def f():
+        return a.foo()
+    
+    res = interpret(f, [], type_system='ootype')
+    assert res == 3
diff --git a/pypy/rpython/rdict.py b/pypy/rpython/rdict.py
--- a/pypy/rpython/rdict.py
+++ b/pypy/rpython/rdict.py
@@ -15,6 +15,7 @@
         dictvalue = self.dictdef.dictvalue
         s_key     = dictkey  .s_value
         s_value   = dictvalue.s_value
+        force_non_null = self.dictdef.force_non_null
         if (s_key.__class__ is annmodel.SomeObject and s_key.knowntype == object and
             s_value.__class__ is annmodel.SomeObject and s_value.knowntype == object):
             return robject.pyobj_repr
@@ -29,7 +30,8 @@
                                                      lambda: rtyper.getrepr(s_value),
                                                      dictkey,
                                                      dictvalue,
-                                                     custom_eq_hash)
+                                                     custom_eq_hash,
+                                                     force_non_null)
 
     def rtyper_makekey(self):
         self.dictdef.dictkey  .dont_change_any_more = True
diff --git a/pypy/rpython/rlist.py b/pypy/rpython/rlist.py
--- a/pypy/rpython/rlist.py
+++ b/pypy/rpython/rlist.py
@@ -667,7 +667,6 @@
     res = l.ll_getitem_fast(index)
     ll_delitem_nonneg(dum_nocheck, l, index)
     return res
-ll_pop.oopspec = 'list.pop(l, index)'
 
 def ll_reverse(l):
     length = l.ll_length()
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -81,16 +81,30 @@
             return super(AbstractStringRepr, self).rtype_is_true(hop)
 
     def rtype_method_startswith(self, hop):
-        str1_repr, str2_repr = self._str_reprs(hop)
-        v_str, v_value = hop.inputargs(str1_repr, str2_repr)
+        str1_repr = hop.args_r[0].repr
+        str2_repr = hop.args_r[1]
+        v_str = hop.inputarg(str1_repr, arg=0)
+        if str2_repr == str2_repr.char_repr:
+            v_value = hop.inputarg(str2_repr.char_repr, arg=1)
+            fn = self.ll.ll_startswith_char
+        else:
+            v_value = hop.inputarg(str2_repr, arg=1)
+            fn = self.ll.ll_startswith
         hop.exception_cannot_occur()
-        return hop.gendirectcall(self.ll.ll_startswith, v_str, v_value)
+        return hop.gendirectcall(fn, v_str, v_value)
 
     def rtype_method_endswith(self, hop):
-        str1_repr, str2_repr = self._str_reprs(hop)
-        v_str, v_value = hop.inputargs(str1_repr, str2_repr)
+        str1_repr = hop.args_r[0].repr
+        str2_repr = hop.args_r[1]
+        v_str = hop.inputarg(str1_repr, arg=0)
+        if str2_repr == str2_repr.char_repr:
+            v_value = hop.inputarg(str2_repr.char_repr, arg=1)
+            fn = self.ll.ll_endswith_char
+        else:
+            v_value = hop.inputarg(str2_repr, arg=1)
+            fn = self.ll.ll_endswith
         hop.exception_cannot_occur()
-        return hop.gendirectcall(self.ll.ll_endswith, v_str, v_value)
+        return hop.gendirectcall(fn, v_str, v_value)
 
     def rtype_method_find(self, hop, reverse=False):
         # XXX binaryop
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -598,6 +598,29 @@
         res = self.interpret(func, [])
         assert res in [5263, 6352]
 
+    def test_dict_popitem_hash(self):
+        def deq(n, m):
+            return n == m
+        def dhash(n):
+            return ~n
+        def func():
+            d = r_dict(deq, dhash)
+            d[5] = 2
+            d[6] = 3
+            k1, v1 = d.popitem()
+            assert len(d) == 1
+            k2, v2 = d.popitem()
+            try:
+                d.popitem()
+            except KeyError:
+                pass
+            else:
+                assert 0, "should have raised KeyError"
+            assert len(d) == 0
+            return k1*1000 + v1*100 + k2*10 + v2
+
+        res = self.interpret(func, [])
+        assert res in [5263, 6352]
 
 class TestLLtype(BaseTestRdict, LLRtypeMixin):
     def test_dict_but_not_with_char_keys(self):
@@ -860,6 +883,25 @@
         res = f()
         assert res == 1
 
+    def test_nonnull_hint(self):
+        def eq(a, b):
+            return a == b
+        def rhash(a):
+            return 3
+        
+        def func(i):
+            d = r_dict(eq, rhash, force_non_null=True)
+            if not i:
+                d[None] = i
+            else:
+                d[str(i)] = i
+            return "12" in d, d
+
+        llres = self.interpret(func, [12])
+        assert llres.item0 == 1
+        DICT = lltype.typeOf(llres.item1)
+        assert sorted(DICT.TO.entries.TO.OF._flds) == ['f_hash', 'key', 'value']
+
     # ____________________________________________________________
 
 
diff --git a/pypy/rpython/test/test_rstr.py b/pypy/rpython/test/test_rstr.py
--- a/pypy/rpython/test/test_rstr.py
+++ b/pypy/rpython/test/test_rstr.py
@@ -227,6 +227,15 @@
                 res = self.interpret(fn, [i,j])
                 assert res is fn(i, j)
 
+    def test_startswith_char(self):
+        const = self.const
+        def fn(i):
+            s = [const(''), const('one'), const('two'), const('o'), const('on'), const('ne'), const('e'), const('twos'), const('foobar'), const('fortytwo')]
+            return s[i].startswith('o')
+        for i in range(10):
+            res = self.interpret(fn, [i])
+            assert res == fn(i)
+
     def test_endswith(self):
         const = self.const
         def fn(i, j):
@@ -238,6 +247,15 @@
                 res = self.interpret(fn, [i,j])
                 assert res is fn(i, j)
 
+    def test_endswith_char(self):
+        const = self.const
+        def fn(i):
+            s = [const(''), const('one'), const('two'), const('o'), const('on'), const('ne'), const('e'), const('twos'), const('foobar'), const('fortytwo')]
+            return s[i].endswith('e')
+        for i in range(10):
+            res = self.interpret(fn, [i])
+            assert res == fn(i)
+
     def test_find(self):
         const = self.const
         def fn(i, j):
diff --git a/pypy/tool/gcc_cache.py b/pypy/tool/gcc_cache.py
--- a/pypy/tool/gcc_cache.py
+++ b/pypy/tool/gcc_cache.py
@@ -39,7 +39,16 @@
         data = ''
     if not (data.startswith('True') or data.startswith('FAIL\n')):
         try:
-            platform.compile(c_files, eci)
+            _previous = platform.log_errors
+            try:
+                platform.log_errors = False
+                platform.compile(c_files, eci)
+            finally:
+                del platform.log_errors
+                # ^^^remove from the instance --- needed so that it can
+                # compare equal to another instance without it
+                if platform.log_errors != _previous:
+                    platform.log_errors = _previous
             data = 'True'
             path.write(data)
         except CompilationError, e:
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -1,9 +1,13 @@
 import re, sys
-from pypy.jit.metainterp.resoperation import rop, opname
+
+from pypy.jit.metainterp.resoperation import opname
 from pypy.jit.tool.oparser import OpParser
+from pypy.tool.logparser import parse_log_file, extract_category
 
 class Op(object):
     bridge = None
+    offset = None
+    asm = None
 
     def __init__(self, name, args, res, descr):
         self.name = name
@@ -51,17 +55,61 @@
 
     # factory method
     Op = Op
+    use_mock_model = True
+
+    def postprocess(self, loop, backend_dump=None, backend_tp=None,
+                    dump_start=0):
+        if backend_dump is not None:
+            raw_asm = self._asm_disassemble(backend_dump.decode('hex'),
+                                            backend_tp, dump_start)
+            asm = []
+            start = 0
+            for elem in raw_asm:
+                if len(elem.split("\t")) != 3:
+                    continue
+                adr, _, v = elem.split("\t")
+                if not start:
+                    start = int(adr.strip(":"), 16)
+                ofs = int(adr.strip(":"), 16) - start
+                if ofs >= 0:
+                    asm.append((ofs, v.strip("\n")))
+            asm_index = 0
+            for i, op in enumerate(loop.operations):
+                end = 0
+                j = i + 1
+                while end == 0:
+                    if j == len(loop.operations):
+                        end = loop.last_offset
+                        break
+                    if loop.operations[j].offset is None:
+                        j += 1
+                    else:
+                        end = loop.operations[j].offset
+                if op.offset is not None:
+                    while asm[asm_index][0] < op.offset:
+                        asm_index += 1
+                    end_index = asm_index
+                    while asm[end_index][0] < end:
+                        end_index += 1
+                    op.asm = '\n'.join([asm[i][1] for i in range(asm_index, end_index)])
+        return loop
+                    
+    def _asm_disassemble(self, d, origin_addr, tp):
+        from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
+        return list(machine_code_dump(d, tp, origin_addr))
 
     @classmethod
-    def parse_from_input(cls, input):
-        return cls(input, None, {}, 'lltype', None,
-                   nonstrict=True).parse()
+    def parse_from_input(cls, input, **kwds):
+        parser = cls(input, None, {}, 'lltype', None,
+                     nonstrict=True)
+        loop = parser.parse()
+        return parser.postprocess(loop, **kwds)
 
     def parse_args(self, opname, argspec):
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.rsplit(", ", 1), None
+            return argspec.split(", ", 1), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -95,12 +143,12 @@
 
     def __init__(self, operations, storage):
         if operations[0].name == 'debug_merge_point':
-            self.inline_level = int(operations[0].args[1])
-            m = re.search('<code object ([<>\w]+), file \'(.+?)\', line (\d+)> #(\d+) (\w+)',
-                         operations[0].getarg(0))
+            self.inline_level = int(operations[0].args[0])
+            m = re.search('<code object ([<>\w]+)\. file \'(.+?)\'\. line (\d+)> #(\d+) (\w+)',
+                         operations[0].getarg(1))
             if m is None:
                 # a non-code loop, like StrLiteralSearch or something
-                self.bytecode_name = operations[0].args[0].split(" ")[0][1:]
+                self.bytecode_name = operations[0].args[1].split(" ")[0][1:]
             else:
                 self.name, self.filename, lineno, bytecode_no, self.bytecode_name = m.groups()
                 self.startlineno = int(lineno)
@@ -119,6 +167,9 @@
     def getcode(self):
         return self.code
 
+    def has_valid_code(self):
+        return self.code is not None
+
     def getopcode(self):
         return self.code.map[self.bytecode_no]
 
@@ -218,6 +269,12 @@
         return self._lineset
     lineset = property(getlineset)
 
+    def has_valid_code(self):
+        for chunk in self.chunks:
+            if not chunk.has_valid_code():
+                return False
+        return True
+
     def _compute_linerange(self):
         self._lineset = set()
         minline = sys.maxint
@@ -273,3 +330,33 @@
             res.append(op)
             i += 1
     return res
+
+
+def import_log(logname, ParserCls=SimpleParser):
+    log = parse_log_file(logname)
+    addrs = {}
+    for entry in extract_category(log, 'jit-backend-addr'):
+        m = re.search('bootstrap ([\da-f]+)', entry)
+        name = entry[:entry.find('(') - 1]
+        addrs[int(m.group(1), 16)] = name
+    dumps = {}
+    for entry in extract_category(log, 'jit-backend-dump'):
+        backend, _, dump, _ = entry.split("\n")
+        _, addr, _, data = re.split(" +", dump)
+        backend_name = backend.split(" ")[1]
+        addr = int(addr[1:], 16)
+        if addr in addrs:
+            dumps[addrs[addr]] = (backend_name, addr, data)
+    loops = []
+    for entry in extract_category(log, 'jit-log-opt'):
+        parser = ParserCls(entry, None, {}, 'lltype', None,
+                           nonstrict=True)
+        loop = parser.parse()
+        comm = loop.comment
+        name = comm[2:comm.find(':')-1]
+        if name in dumps:
+            bname, start_ofs, dump = dumps[name]
+            parser.postprocess(loop, backend_tp=bname, backend_dump=dump,
+                               dump_start=start_ofs)
+        loops.append(loop)
+    return log, loops
diff --git a/pypy/tool/jitlogparser/test/logtest.log b/pypy/tool/jitlogparser/test/logtest.log
new file mode 100644
--- /dev/null
+++ b/pypy/tool/jitlogparser/test/logtest.log
@@ -0,0 +1,38 @@
+[11f210b47027] {jit-backend
+[11f210b900f7] {jit-backend-dump
+BACKEND x86_64
+SYS_EXECUTABLE python
+CODE_DUMP @7f3b0b2e63d5 +0  554889E5534154415541564157488DA500000000488B042590C5540148C7042590C554010000000048898570FFFFFF488B042598C5540148C7042598C554010000000048898568FFFFFF488B0425A0C5540148C70425A0C554010000000048898560FFFFFF488B0425A8C5540148C70425A8C554010000000048898558FFFFFF4C8B3C2550525B0149BB3050920D3B7F00004D8B334983C60149BB3050920D3B7F00004D89334981FF102700000F8D000000004983C7014C8B342580F76A024983EE014C89342580F76A024983FE000F8C00000000E9AEFFFFFF488B042588F76A024829E0483B042580EC3C01760D49BB05632E0B3B7F000041FFD3554889E5534154415541564157488DA550FFFFFF4889BD70FFFFFF4889B568FFFFFF48899560FFFFFF48898D58FFFFFF4D89C7E954FFFFFF49BB00602E0B3B7F000041FFD34440484C3D030300000049BB00602E0B3B7F000041FFD34440484C3D070304000000
+[11f210b949b3] jit-backend-dump}
+[11f210b949b4] {jit-backend-addr
+Loop 0 (<code object f, file 'x.py', line 2> #9 LOAD_FAST) has address 7f3b0b2e645d to 7f3b0b2e64af (bootstrap 7f3b0b2e63d5)
+[11f210bab188] jit-backend-addr}
+[11f210bab189] jit-backend}
+[11f210bacbb7] {jit-log-opt-loop
+# Loop 0 : loop with 19 ops
+[p0, p1, p2, p3, i4]
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #9 LOAD_FAST')
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #12 LOAD_CONST')
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
++166: i6 = int_lt(i4, 10000)
+guard_true(i6, descr=<Guard3>) [p1, p0, p2, p3, i4]
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #18 POP_JUMP_IF_FALSE')
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #21 LOAD_FAST')
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #24 LOAD_CONST')
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
++179: i8 = int_add(i4, 1)
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #28 STORE_FAST')
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
++183: i10 = getfield_raw(40564608, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
++191: i12 = int_sub(i10, 1)
++195: setfield_raw(40564608, i12, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
++203: i14 = int_lt(i12, 0)
+guard_false(i14, descr=<Guard4>) [p1, p0, p2, p3, i8, None]
+debug_merge_point(0, '<code object f. file 'x.py'. line 2> #9 LOAD_FAST')
++213: jump(p0, p1, p2, p3, i8, descr=<Loop0>)
++218: --end of the loop--
+[11f210c17981] jit-log-opt-loop}
+[11f210fb1d21] {jit-backend-counts
+0:8965
+1:2
+[11f210fb381b] jit-backend-counts}
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -1,12 +1,11 @@
-from pypy.jit.metainterp.resoperation import ResOperation, rop
-from pypy.jit.metainterp.history import ConstInt, Const
-from pypy.tool.jitlogparser.parser import SimpleParser, TraceForOpcode, Function,\
-     adjust_bridges
+from pypy.tool.jitlogparser.parser import (SimpleParser, TraceForOpcode,
+                                           Function, adjust_bridges,
+                                           import_log)
 from pypy.tool.jitlogparser.storage import LoopStorage
-import py
+import py, sys
 
-def parse(input):
-    return SimpleParser.parse_from_input(input)
+def parse(input, **kwds):
+    return SimpleParser.parse_from_input(input, **kwds)
 
 
 def test_parse():
@@ -29,7 +28,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point("SomeRandomStuff", 0)
+    debug_merge_point(0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -38,10 +37,10 @@
 def test_split():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -54,12 +53,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #28 CALL_FUNCTION', 0)
+    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #0 LOAD_FAST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #3 LOAD_CONST', 1)
-    debug_merge_point('<code object inner, file 'source.py', line 9> #7 RETURN_VALUE', 1)
-    debug_merge_point('<code object inlined_call, file 'source.py', line 12> #31 STORE_FAST', 0)
+    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
+    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
+    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -72,10 +71,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -89,10 +88,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 200> #10 ADD", 0)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 201> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point("<code object stuff, file '/I/dont/exist.py', line 202> #11 SUB", 0)
+    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -102,33 +101,37 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #0 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #3 LOAD_FAST", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #6 BINARY_ADD", 0)
-    debug_merge_point("<code object f, file '%(fname)s', line 2> #7 RETURN_VALUE", 0)
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
+    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
 
 def test_linerange():
+    if sys.version_info > (2, 6):
+        py.test.skip("unportable test")
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #9 LOAD_FAST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #12 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #22 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #28 LOAD_CONST", 0)
-    debug_merge_point("<code object g, file '%(fname)s', line 5> #6 SETUP_LOOP", 0)
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
+    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
     assert res.lineset == set([7, 8, 9])
 
 def test_linerange_notstarts():
+    if sys.version_info > (2, 6):
+        py.test.skip("unportable test")
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point('<code object h, file '%(fname)s', line 11> #17 FOR_ITER', 0)
+    debug_merge_point(0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -168,14 +171,46 @@
     []
     int_add(0, 1)
     ''')
-    loops = LoopStorage().reconnect_loops([main, bridge])
+    LoopStorage().reconnect_loops([main, bridge])
     assert adjust_bridges(main, {})[1].name == 'guard_true'
     assert adjust_bridges(main, {'loop-13': True})[1].name == 'int_add'
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point('StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]', 0)
+    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
     assert chunk.bytecode_name == 'StrLiteralSearch'
+
+def test_parsing_assembler():
+    backend_dump = "554889E5534154415541564157488DA500000000488B042590C5540148C7042590C554010000000048898570FFFFFF488B042598C5540148C7042598C554010000000048898568FFFFFF488B0425A0C5540148C70425A0C554010000000048898560FFFFFF488B0425A8C5540148C70425A8C554010000000048898558FFFFFF4C8B3C2550525B0149BB30E06C96FC7F00004D8B334983C60149BB30E06C96FC7F00004D89334981FF102700000F8D000000004983C7014C8B342580F76A024983EE014C89342580F76A024983FE000F8C00000000E9AEFFFFFF488B042588F76A024829E0483B042580EC3C01760D49BB05F30894FC7F000041FFD3554889E5534154415541564157488DA550FFFFFF4889BD70FFFFFF4889B568FFFFFF48899560FFFFFF48898D58FFFFFF4D89C7E954FFFFFF49BB00F00894FC7F000041FFD34440484C3D030300000049BB00F00894FC7F000041FFD34440484C3D070304000000"
+    dump_start = 0x7f3b0b2e63d5
+    loop = parse("""
+    # Loop 0 : loop with 19 ops
+    [p0, p1, p2, p3, i4]
+    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    +166: i6 = int_lt(i4, 10000)
+    guard_true(i6, descr=<Guard3>) [p1, p0, p2, p3, i4]
+    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
+    +179: i8 = int_add(i4, 1)
+    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
+    +183: i10 = getfield_raw(40564608, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
+    +191: i12 = int_sub(i10, 1)
+    +195: setfield_raw(40564608, i12, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
+    +203: i14 = int_lt(i12, 0)
+    guard_false(i14, descr=<Guard4>) [p1, p0, p2, p3, i8, None]
+    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #9 LOAD_FAST')
+    +213: jump(p0, p1, p2, p3, i8, descr=<Loop0>)
+    +218: --end of the loop--""", backend_dump=backend_dump,
+                 dump_start=dump_start,
+                 backend_tp='x86_64')
+    cmp = loop.operations[1]
+    assert 'jge' in cmp.asm
+    assert '0x2710' in cmp.asm
+    assert 'jmp' in loop.operations[-1].asm
+
+def test_import_log():
+    _, loops = import_log(str(py.path.local(__file__).join('..',
+                                                           'logtest.log')))
+    assert 'jge' in loops[0].operations[3].asm
diff --git a/pypy/tool/pytest/appsupport.py b/pypy/tool/pytest/appsupport.py
--- a/pypy/tool/pytest/appsupport.py
+++ b/pypy/tool/pytest/appsupport.py
@@ -1,8 +1,13 @@
 import autopath
 import py
-from pypy.interpreter import gateway
+from pypy.interpreter import gateway, pycode
 from pypy.interpreter.error import OperationError
 
+try:
+    from _pytest.assertion.newinterpret import interpret
+except ImportError:
+    from _pytest.assertion.oldinterpret import interpret
+
 # ____________________________________________________________
 
 class AppCode(object):
@@ -51,13 +56,11 @@
         space = self.space
         for key, w_value in vars.items():
             space.setitem(self.w_locals, space.wrap(key), w_value)
-        return space.eval(code, self.w_globals, self.w_locals)
-
-    def exec_(self, code, **vars):
-        space = self.space
-        for key, w_value in vars.items():
-            space.setitem(self.w_locals, space.wrap(key), w_value)
-        space.exec_(code, self.w_globals, self.w_locals)
+        if isinstance(code, str):
+            return space.eval(code, self.w_globals, self.w_locals)
+        pyc = pycode.PyCode._from_code(space, code)
+        return pyc.exec_host_bytecode(self.w_globals, self.w_locals)
+    exec_ = eval
 
     def repr(self, w_value):
         return self.space.unwrap(self.space.repr(w_value))
@@ -80,7 +83,7 @@
     def __init__(self, space, operr):
         self.space = space
         self.operr = operr
-        self.typename = operr.w_type.getname(space, "?")
+        self.typename = operr.w_type.getname(space)
         self.traceback = AppTraceback(space, self.operr.get_traceback())
         debug_excs = getattr(operr, 'debug_excs', [])
         if debug_excs:
@@ -163,8 +166,8 @@
             except py.error.ENOENT: 
                 source = None
             from pypy import conftest
-            if source and not py.test.config.option.nomagic:
-                msg = py.code._reinterpret_old(source, runner, should_fail=True)
+            if source and py.test.config._assertstate.mode != "off":
+                msg = interpret(source, runner, should_fail=True)
                 space.setattr(w_self, space.wrap('args'),
                             space.newtuple([space.wrap(msg)]))
                 w_msg = space.wrap(msg)
diff --git a/pypy/tool/pytest/test/test_pytestsupport.py b/pypy/tool/pytest/test/test_pytestsupport.py
--- a/pypy/tool/pytest/test/test_pytestsupport.py
+++ b/pypy/tool/pytest/test/test_pytestsupport.py
@@ -4,7 +4,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.interpreter.pyframe import PyFrame
 from pypy.tool.pytest.appsupport import (AppFrame, build_pytest_assertion,
-    AppExceptionInfo)
+    AppExceptionInfo, interpret)
 import py
 from pypy.tool.udir import udir
 import os
@@ -22,8 +22,8 @@
     co = PyCode._from_code(space, somefunc.func_code)
     pyframe = PyFrame(space, co, space.newdict(), None)
     runner = AppFrame(space, pyframe)
-    py.code._reinterpret_old("f = lambda x: x+1", runner, should_fail=False)
-    msg = py.code._reinterpret_old("assert isinstance(f(2), float)", runner)
+    interpret("f = lambda x: x+1", runner, should_fail=False)
+    msg = interpret("assert isinstance(f(2), float)", runner)
     assert msg.startswith("assert isinstance(3, float)\n"
                           " +  where 3 = ")
 
@@ -58,6 +58,12 @@
     except AssertionError, e:
         assert e.msg == "Failed"
 
+def app_test_comparison():
+    try:
+        assert 3 > 4
+    except AssertionError, e:
+        assert "3 > 4" in e.msg
+
 
 def test_appexecinfo(space):
     try:
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -3,9 +3,9 @@
 It uses 'pypy/translator/goal/pypy-c' and parts of the rest of the working
 copy.  Usage:
 
-    package.py root-pypy-dir [name-of-archive] [name-of-pypy-c]
+    package.py root-pypy-dir [name-of-archive] [name-of-pypy-c] [destination-for-tarball] [pypy-c-path]
 
-Usually you would do:   package.py ../../.. pypy-VER-PLATFORM.
+Usually you would do:   package.py ../../.. pypy-VER-PLATFORM
 The output is found in the directory /tmp/usession-YOURNAME/build/.
 """
 
@@ -122,7 +122,10 @@
             zf.close()
         else:
             archive = str(builddir.join(name + '.tar.bz2'))
-            e = os.system('tar --owner=root --group=root --numeric-owner -cvjf ' + archive + " " + name)
+            if sys.platform == 'darwin':
+                e = os.system('tar --numeric-owner -cvjf ' + archive + " " + name)
+            else:
+                e = os.system('tar --owner=root --group=root --numeric-owner -cvjf ' + archive + " " + name)
             if e:
                 raise OSError('"tar" returned exit status %r' % e)
     finally:
diff --git a/pypy/tool/test/test_gcc_cache.py b/pypy/tool/test/test_gcc_cache.py
--- a/pypy/tool/test/test_gcc_cache.py
+++ b/pypy/tool/test/test_gcc_cache.py
@@ -1,11 +1,13 @@
-
+import sys
 from pypy.tool.gcc_cache import *
 from pypy.tool.udir import udir
-import md5
+import md5, cStringIO
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
+localudir = udir.join('test_gcc_cache').ensure(dir=1)
+
 def test_gcc_exec():
-    f = udir.join("x.c")
+    f = localudir.join("x.c")
     f.write("""
     #include <stdio.h>
     #include <test_gcc_exec.h>
@@ -15,8 +17,8 @@
        return 0;
     }
     """)
-    dir1 = udir.join('test_gcc_exec_dir1').ensure(dir=1)
-    dir2 = udir.join('test_gcc_exec_dir2').ensure(dir=1)
+    dir1 = localudir.join('test_gcc_exec_dir1').ensure(dir=1)
+    dir2 = localudir.join('test_gcc_exec_dir2').ensure(dir=1)
     dir1.join('test_gcc_exec.h').write('#define ANSWER 3\n')
     dir2.join('test_gcc_exec.h').write('#define ANSWER 42\n')
     eci = ExternalCompilationInfo(include_dirs=[str(dir1)])
@@ -36,7 +38,7 @@
     print '>>>'
 
 def test_gcc_ask():
-    f = udir.join("y.c")
+    f = localudir.join("y.c")
     f.write("""
     #include <stdio.h>
     #include <test_gcc_ask.h>
@@ -46,8 +48,8 @@
        return 0;
     }
     """)
-    dir1 = udir.join('test_gcc_ask_dir1').ensure(dir=1)
-    dir2 = udir.join('test_gcc_ask_dir2').ensure(dir=1)
+    dir1 = localudir.join('test_gcc_ask_dir1').ensure(dir=1)
+    dir2 = localudir.join('test_gcc_ask_dir2').ensure(dir=1)
     dir1.join('test_gcc_ask.h').write('/* hello world */\n')
     dir2.join('test_gcc_ask.h').write('#error boom\n')
     eci = ExternalCompilationInfo(include_dirs=[str(dir1)])
@@ -63,3 +65,15 @@
     print '<<<'
     print err
     print '>>>'
+
+def test_gcc_ask_doesnt_log_errors():
+    f = localudir.join('z.c')
+    f.write("""this file is not valid C code\n""")
+    eci = ExternalCompilationInfo()
+    oldstderr = sys.stderr
+    try:
+        sys.stderr = capture = cStringIO.StringIO()
+        py.test.raises(CompilationError, try_compile_cache, [f], eci)
+    finally:
+        sys.stderr = oldstderr
+    assert 'ERROR' not in capture.getvalue().upper()
diff --git a/pypy/translator/c/gc.py b/pypy/translator/c/gc.py
--- a/pypy/translator/c/gc.py
+++ b/pypy/translator/c/gc.py
@@ -297,6 +297,13 @@
 
     gc_startup_code = RefcountingGcPolicy.gc_startup_code.im_func
 
+    def compilation_info(self):
+        eci = BasicGcPolicy.compilation_info(self)
+        eci = eci.merge(ExternalCompilationInfo(
+            post_include_bits=['#define USING_NO_GC_AT_ALL'],
+            ))
+        return eci
+
 
 class FrameworkGcPolicy(BasicGcPolicy):
     transformerclass = framework.FrameworkGCTransformer
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -187,8 +187,8 @@
 
     def requestgcroots(self, tracker):
         # no need to track the value of these registers in the caller
-        # function if we are the main(), or if we are flagged as a
-        # "bottom" function (a callback from C code)
+        # function if we are flagged as a "bottom" function (a callback
+        # from C code, or pypy_main_function())
         if tracker.is_stack_bottom:
             return {}
         else:
diff --git a/pypy/translator/c/gcc/test/elf/track10.s b/pypy/translator/c/gcc/test/elf/track10.s
--- a/pypy/translator/c/gcc/test/elf/track10.s
+++ b/pypy/translator/c/gcc/test/elf/track10.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	pushl	%ebx
 	call	pypy_f
 	;; expected {4(%esp) | (%esp), %esi, %edi, %ebp | %ebx}
@@ -11,4 +11,4 @@
 	/* GCROOT %ebx */
 	popl	%ebx
 	ret
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/elf/track12.s b/pypy/translator/c/gcc/test/elf/track12.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/elf/track12.s
@@ -0,0 +1,9 @@
+	.type	pypy_f, @function
+pypy_f:
+	pushl   4(%esp)
+	call    pypy_other
+	;; expected {4(%esp) | %ebx, %esi, %edi, %ebp | (%esp)}
+	popl    %eax
+	/* GCROOT %eax */
+	ret
+	.size	pypy_f, .-pypy_f
diff --git a/pypy/translator/c/gcc/test/elf/track13.s b/pypy/translator/c/gcc/test/elf/track13.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/elf/track13.s
@@ -0,0 +1,9 @@
+	.type	pypy_f, @function
+pypy_f:
+	call    pypy_other
+	;; expected {(%esp) | %ebx, %esi, %edi, %ebp | 8(%esp)}
+	pushl   8(%esp)
+	popl    %eax
+	/* GCROOT %eax */
+	ret
+	.size	pypy_f, .-pypy_f
diff --git a/pypy/translator/c/gcc/test/elf/track4.s b/pypy/translator/c/gcc/test/elf/track4.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track4.s
+++ /dev/null
@@ -1,52 +0,0 @@
-	.type	main, @function
-main:
-	;; this is an artificial example showing what kind of code gcc
-	;; can produce for main()
-	pushl	%ebp
-	movl	%eax, $globalptr1
-	movl	%esp, %ebp
-	pushl	%edi
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	%ebx, -8(%ebp)
-	movl	8(%ebp), %edi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-.L1:
-	cmpl	$0, %eax
-	je	.L3
-.L2:
-	;; inlined function here with -fomit-frame-pointer
-	movl	%eax, -12(%ebp)
-	movl	%edi, %edx
-	subl	$16, %esp
-	movl	%eax, (%esp)
-	movl	$42, %edi
-	movl	%edx, 4(%esp)
-	movl	%esi, %ebx
-	movl	$nonsense, %esi
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %ebx, -4(%ebp), (%ebp) | 4(%esp), -12(%ebp)}
-	addl	%edi, %eax
-	movl	4(%esp), %eax
-	movl	%ebx, %esi
-	addl	$16, %esp
-	movl	%eax, %edi
-	movl	-12(%ebp), %eax
-#APP
-	/* GCROOT %eax */
-#NO_APP
-	;; end of inlined function
-.L3:
-	call	foobar
-	;; expected {4(%ebp) | -8(%ebp), %esi, -4(%ebp), (%ebp) | %edi}
-#APP
-	/* GCROOT %edi */
-#NO_APP
-	movl	-8(%ebp), %ebx
-	movl	-4(%ebp), %edi
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track6.s b/pypy/translator/c/gcc/test/elf/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/elf/track6.s
+++ /dev/null
@@ -1,26 +0,0 @@
-	.type	main, @function
-main:
-	;; a minimal example showing what kind of code gcc
-	;; can produce for main(): some local variable accesses
-	;; are relative to %ebp, while others are relative to
-	;; %esp, and the difference %ebp-%esp is not constant
-	;; because of the 'andl' to align the stack
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$8, %esp
-	andl	$-16, %esp
-	movl	$globalptr1, -4(%ebp)
-	movl	$globalptr2, (%esp)
-	pushl	$0
-	call	foobar
-	;; expected {4(%ebp) | %ebx, %esi, %edi, (%ebp) | 4(%esp), -4(%ebp)}
-	popl	%eax
-#APP
-	/* GCROOT -4(%ebp) */
-	/* GCROOT (%esp) */
-#NO_APP
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-
-	.size	main, .-main
diff --git a/pypy/translator/c/gcc/test/elf/track7.s b/pypy/translator/c/gcc/test/elf/track7.s
--- a/pypy/translator/c/gcc/test/elf/track7.s
+++ b/pypy/translator/c/gcc/test/elf/track7.s
@@ -1,5 +1,5 @@
-	.type	main, @function
-main:
+	.type	main1, @function
+main1:
 	;; cmovCOND tests.
 	pushl	%ebx
 	movl	12(%esp), %ebx
@@ -16,4 +16,4 @@
 	popl	%ebx
 	ret
 
-	.size	main, .-main
+	.size	main1, .-main1
diff --git a/pypy/translator/c/gcc/test/msvc/track6.s b/pypy/translator/c/gcc/test/msvc/track6.s
deleted file mode 100644
--- a/pypy/translator/c/gcc/test/msvc/track6.s
+++ /dev/null
@@ -1,15 +0,0 @@
-_TEXT	SEGMENT
-_pypy_g_foo PROC					; COMDAT
-
-	push	ebp
-	mov	ebp, esp
-	and	esp, -64
-	sub	esp, 12
-	push	esi
-	call	_pypy_g_something_else
-	;; expected {4(%ebp) | %ebx, (%esp), %edi, (%ebp) | }
-	pop	esi
-	mov	esp, ebp
-	pop	ebp
-	ret	0
-_pypy_g_foo ENDP
diff --git a/pypy/translator/c/gcc/test/msvc/track_and_esp.s b/pypy/translator/c/gcc/test/msvc/track_and_esp.s
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/gcc/test/msvc/track_and_esp.s
@@ -0,0 +1,474 @@
+PUBLIC	??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@ ; `string'
+PUBLIC	_pypy_g_ll_math_ll_math_frexp
+;	COMDAT ??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@
+CONST	SEGMENT
+??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@ DB 'pypy_g_ll_math_l'
+	DB	'l_math_frexp', 00H				; `string'
+; Function compile flags: /Ogtpy
+CONST	ENDS
+;	COMDAT _pypy_g_ll_math_ll_math_frexp
+_TEXT	SEGMENT
+_l_mantissa_0$ = -8					; size = 8
+_l_v21638$ = -8						; size = 8
+_l_x_14$ = 8						; size = 8
+_pypy_g_ll_math_ll_math_frexp PROC			; COMDAT
+
+; 58245: struct pypy_tuple2_0 *pypy_g_ll_math_ll_math_frexp(double l_x_14) {
+
+	push	ebp
+	mov	ebp, esp
+	and	esp, -64				; ffffffc0H
+
+; 58246: 	long *l_exp_p_0; double l_mantissa_0; bool_t l_v21641;
+; 58247: 	bool_t l_v21643; bool_t l_v21644; bool_t l_v21646; bool_t l_v21647;
+; 58248: 	bool_t l_v21652; bool_t l_v21653; bool_t l_v21660; bool_t l_v21666;
+; 58249: 	bool_t l_v21670; bool_t l_v21674; bool_t l_v21676; double l_v21638;
+; 58250: 	long l_v21637; long l_v21649; long l_v21651; long l_v21677;
+; 58251: 	long l_v21678; struct pypy_exceptions_Exception0 *l_v21687;
+; 58252: 	struct pypy_header0 *l_v21654; struct pypy_object0 *l_v21682;
+; 58253: 	struct pypy_object0 *l_v21691; struct pypy_object_vtable0 *l_v21665;
+; 58254: 	struct pypy_object_vtable0 *l_v21669;
+; 58255: 	struct pypy_object_vtable0 *l_v21675;
+; 58256: 	struct pypy_object_vtable0 *l_v21683; struct pypy_tuple2_0 *l_v21640;
+; 58257: 	struct pypy_tuple2_0 *l_v21695; void* l_v21639; void* l_v21648;
+; 58258: 	void* l_v21650; void* l_v21656; void* l_v21658; void* l_v21659;
+; 58259: 	void* l_v21668; void* l_v21672; void* l_v21679; void* l_v21688;
+; 58260: 	void* l_v21696;
+; 58261: 	goto block0;
+; 58262: 
+; 58263:     block0:
+; 58264: 	l_v21641 = pypy_g_ll_math_ll_math_isnan(l_x_14);
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	sub	esp, 52					; 00000034H
+	push	ebx
+	push	esi
+	push	edi
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+$block0$88239:
+	call	_pypy_g_ll_math_ll_math_isnan
+
+; 58265: 	pypy_asm_gc_nocollect(pypy_g_ll_math_ll_math_isnan);
+; 58266: 	l_v21643 = l_v21641;
+; 58267: 	if (l_v21643) {
+; 58268: 		l_v21637 = 0L;
+; 58269: 		l_v21638 = l_x_14;
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	add	esp, 8
+	test	al, al
+
+; 58270: 		goto block3;
+
+	jne	SHORT $LN10 at pypy_g_ll_@159
+
+; 58271: 	}
+; 58272: 	goto block1;
+; 58273: 
+; 58274:     block1:
+; 58275: 	l_v21644 = pypy_g_ll_math_ll_math_isinf(l_x_14);
+
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+$block1$88243:
+	call	_pypy_g_ll_math_ll_math_isinf
+	add	esp, 8
+
+; 58276: 	pypy_asm_gc_nocollect(pypy_g_ll_math_ll_math_isinf);
+; 58277: 	l_v21646 = l_v21644;
+; 58278: 	if (l_v21646) {
+
+	test	al, al
+	je	SHORT $block2$88245
+
+; 58279: 		l_v21637 = 0L;
+; 58280: 		l_v21638 = l_x_14;
+
+	fld	QWORD PTR _l_x_14$[ebp]
+$LN10 at pypy_g_ll_@159:
+
+; 58288: 		goto block14;
+; 58289: 	}
+; 58290: 	l_v21637 = 0L;
+
+	xor	edi, edi
+$LN30 at pypy_g_ll_@159:
+
+; 58291: 	l_v21638 = l_x_14;
+; 58292: 	goto block3;
+; 58293: 
+; 58294:     block3:
+; 58295: 	l_v21648 = (&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_free;
+
+	mov	esi, DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+4
+	fstp	QWORD PTR _l_v21638$[esp+64]
+
+; 58296: 	OP_RAW_MALLOC_USAGE((0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))), l_v21649);
+; 58297: 	l_v21650 = (&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_top_of_space;
+; 58298: 	OP_ADR_DELTA(l_v21650, l_v21648, l_v21651);
+
+	mov	eax, DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+12
+	sub	eax, esi
+
+; 58299: 	OP_INT_GT(l_v21649, l_v21651, l_v21652);
+
+	cmp	eax, 24					; 00000018H
+$block3$88242:
+
+; 58300: 	if (l_v21652) {
+
+	jge	$block4$88260
+
+; 58334: 	l_v21695 = l_v21640;
+; 58335: 	goto block8;
+; 58336: 
+; 58337:     block8:
+; 58338: 	RPY_DEBUG_RETURN();
+; 58339: 	return l_v21695;
+; 58340: 
+; 58341:     block9:
+; 58342: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+; 58343: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+; 58344: 	goto block8;
+; 58345: 
+; 58346:     block10:
+; 58347: 	abort();  /* debug_llinterpcall should be unreachable */
+; 58348: 	l_v21665 = (&pypy_g_ExcData)->ed_exc_type;
+; 58349: 	l_v21666 = (l_v21665 == NULL);
+; 58350: 	if (!l_v21666) {
+; 58351: 		goto block11;
+; 58352: 	}
+; 58353: 	goto block5;
+; 58354: 
+; 58355:     block11:
+; 58356: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+; 58357: 	l_v21696 = NULL;
+; 58358: 	goto block6;
+; 58359: 
+; 58360:     block12:
+; 58361: 	l_v21668 = pypy_g_SemiSpaceGC_obtain_free_space((&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC), (0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))));
+
+	push	24					; 00000018H
+	push	OFFSET _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC
+$block12$88259:
+	call	_pypy_g_SemiSpaceGC_obtain_free_space
+    ;; expected {4(%ebp) | 16(%esp), 12(%esp), 8(%esp), (%ebp) | }
+
+; 58362: 	l_v21669 = (&pypy_g_ExcData)->ed_exc_type;
+; 58363: 	l_v21670 = (l_v21669 == NULL);
+
+	xor	ecx, ecx
+	add	esp, 8
+	cmp	DWORD PTR _pypy_g_ExcData, ecx
+
+; 58364: 	if (!l_v21670) {
+
+	je	$LN5 at pypy_g_ll_@159
+
+; 58368: 	goto block4;
+; 58369: 
+; 58370:     block13:
+; 58371: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?N@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], ecx
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?8??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], ecx
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypydtcount, eax
+$block13$88313:
+$block9$88285:
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block2$88245:
+
+; 58281: 		goto block3;
+; 58282: 	}
+; 58283: 	goto block2;
+; 58284: 
+; 58285:     block2:
+; 58286: 	OP_FLOAT_IS_TRUE(l_x_14, l_v21647);
+
+	fldz
+	fld	QWORD PTR _l_x_14$[ebp]
+	fucom	ST(1)
+	fnstsw	ax
+	fstp	ST(1)
+	test	ah, 68					; 00000044H
+
+; 58287: 	if (l_v21647) {
+
+	jnp	$LN10 at pypy_g_ll_@159
+
+; 58372: 	l_v21696 = NULL;
+; 58373: 	goto block6;
+; 58374: 
+; 58375:     block14:
+; 58376: 	l_v21672 = pypy_g__ll_malloc_varsize_no_length__Signed_Signed_Sign(1L, (0 + 0), sizeof(long));
+
+	push	4
+	fstp	ST(0)
+	push	0
+	push	1
+$block14$88247:
+	call	_pypy_g__ll_malloc_varsize_no_length__Signed_Signed_Sign
+    ;; expected {4(%ebp) | 20(%esp), 16(%esp), 12(%esp), (%ebp) | }
+	mov	esi, eax
+
+; 58377: 	OP_TRACK_ALLOC_START(l_v21672, /* nothing */);
+
+	push	OFFSET ??_C at _0BN@BIPHFGBC at pypy_g_ll_math_ll_math_frexp?$AA@
+	push	esi
+	call	_pypy_debug_alloc_start
+    ;; expected {4(%ebp) | 28(%esp), 24(%esp), 20(%esp), (%ebp) | }
+	add	esp, 20					; 00000014H
+
+; 58378: 	l_exp_p_0 = (long *)l_v21672;
+; 58379: 	l_v21674 = (l_exp_p_0 != NULL);
+
+	test	esi, esi
+
+; 58380: 	if (!l_v21674) {
+
+	jne	SHORT $block15$88324
+
+; 58418: 	goto block8;
+; 58419: 
+; 58420:     block18:
+; 58421: 	PYPY_DEBUG_RECORD_TRACEBACK("ll_math_ll_math_frexp");
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?BB@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], esi
+	inc	eax
+	and	eax, 8191				; 00001fffH
+	mov	DWORD PTR _pypydtcount, eax
+$block18$88323:
+
+; 58422: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block15$88324:
+
+; 58381: 		goto block18;
+; 58382: 	}
+; 58383: 	goto block15;
+; 58384: 
+; 58385:     block15:
+; 58386: 	l_mantissa_0 = pypy_g_frexp__Float_arrayPtr_star_2(l_x_14, l_exp_p_0);
+
+	fld	QWORD PTR _l_x_14$[ebp]
+	push	esi
+	sub	esp, 8
+	fstp	QWORD PTR [esp]
+	call	_pypy_g_frexp__Float_arrayPtr_star_2
+    ;; expected {4(%ebp) | 20(%esp), 16(%esp), 12(%esp), (%ebp) | }
+
+; 58387: 	l_v21675 = (&pypy_g_ExcData)->ed_exc_type;
+; 58388: 	l_v21676 = (l_v21675 == NULL);
+
+	mov	edi, DWORD PTR _pypy_g_ExcData
+	fstp	QWORD PTR _l_mantissa_0$[esp+76]
+	add	esp, 12					; 0000000cH
+	test	edi, edi
+
+; 58389: 	if (!l_v21676) {
+
+	je	SHORT $block16$88328
+
+; 58403: 
+; 58404:     block17:
+; 58405: 	l_v21682 = (&pypy_g_ExcData)->ed_exc_value;
+; 58406: 	l_v21683 = (&pypy_g_ExcData)->ed_exc_type;
+; 58407: 	PYPY_DEBUG_CATCH_EXCEPTION("ll_math_ll_math_frexp", l_v21683, l_v21683 == (&pypy_g_py__code_assertion_AssertionError_vtable.ae_super.ae_super.se_super.e_super) || l_v21683 == (&pypy_g_exceptions_NotImplementedError_vtable.nie_super.re_super.se_super.e_super));
+
+	mov	eax, DWORD PTR _pypydtcount
+	mov	ebx, DWORD PTR _pypy_g_ExcData+4
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8], OFFSET ?loc@?BA@??pypy_g_ll_math_ll_math_frexp@@9 at 9
+	mov	DWORD PTR _pypy_debug_tracebacks[eax*8+4], edi
+	inc	eax
+	and	eax, 8191				; 00001fffH
+$block17$88327:
+	mov	DWORD PTR _pypydtcount, eax
+	cmp	edi, OFFSET _pypy_g_py__code_assertion_AssertionError_vtable
+	je	SHORT $LN1 at pypy_g_ll_@159
+	cmp	edi, OFFSET _pypy_g_exceptions_NotImplementedError_vtable
+	jne	SHORT $LN2 at pypy_g_ll_@159
+$LN1 at pypy_g_ll_@159:
+	call	_pypy_debug_catch_fatal_exception
+$LN2 at pypy_g_ll_@159:
+
+; 58408: 	(&pypy_g_ExcData)->ed_exc_value = ((struct pypy_object0 *) NULL);
+
+	xor	eax, eax
+
+; 58409: 	(&pypy_g_ExcData)->ed_exc_type = ((struct pypy_object_vtable0 *) NULL);
+; 58410: 	l_v21687 = (struct pypy_exceptions_Exception0 *)l_v21682;
+; 58411: 	l_v21688 = (void*)l_exp_p_0;
+; 58412: 	OP_TRACK_ALLOC_STOP(l_v21688, /* nothing */);
+
+	push	esi
+	mov	DWORD PTR _pypy_g_ExcData+4, eax
+	mov	DWORD PTR _pypy_g_ExcData, eax
+	call	_pypy_debug_alloc_stop
+    ;; expected {4(%ebp) | 12(%esp), 8(%esp), 4(%esp), (%ebp) | }
+
+; 58413: 	OP_RAW_FREE(l_v21688, /* nothing */);
+
+	push	esi
+	call	_PyObject_Free
+    ;; expected {4(%ebp) | 16(%esp), 12(%esp), 8(%esp), (%ebp) | }
+
+; 58414: 	l_v21691 = (struct pypy_object0 *)l_v21687;
+; 58415: 	pypy_g_RPyReRaiseException(l_v21683, l_v21691);
+
+	push	ebx
+	push	edi
+	call	_pypy_g_RPyReRaiseException
+	add	esp, 16					; 00000010H
+
+; 58416: 	pypy_asm_gc_nocollect(pypy_g_RPyReRaiseException);
+; 58417: 	l_v21695 = ((struct pypy_tuple2_0 *) NULL);
+
+	xor	eax, eax
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	pop	esi
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+$block16$88328:
+
+; 58390: 		goto block17;
+; 58391: 	}
+; 58392: 	goto block16;
+; 58393: 
+; 58394:     block16:
+; 58395: 	l_v21677 = RPyBareItem(l_exp_p_0, 0L);
+; 58396: 	l_v21678 = (long)(l_v21677);
+
+	mov	edi, DWORD PTR [esi]
+
+; 58397: 	l_v21679 = (void*)l_exp_p_0;
+; 58398: 	OP_TRACK_ALLOC_STOP(l_v21679, /* nothing */);
+
+	push	esi
+	call	_pypy_debug_alloc_stop
+    ;; expected {4(%ebp) | 12(%esp), 8(%esp), 4(%esp), (%ebp) | }
+
+; 58399: 	OP_RAW_FREE(l_v21679, /* nothing */);
+
+	push	esi
+	call	_PyObject_Free
+    ;; expected {4(%ebp) | 16(%esp), 12(%esp), 8(%esp), (%ebp) | }
+
+; 58400: 	l_v21637 = l_v21678;
+; 58401: 	l_v21638 = l_mantissa_0;
+
+	fld	QWORD PTR _l_mantissa_0$[esp+72]
+	add	esp, 8
+
+; 58402: 	goto block3;
+
+	jmp	$LN30 at pypy_g_ll_@159
+$LN5 at pypy_g_ll_@159:
+
+; 58365: 		goto block13;
+; 58366: 	}
+; 58367: 	l_v21639 = l_v21668;
+
+	mov	esi, eax
+$block4$88260:
+$block5$88263:
+
+; 58301: 		goto block12;
+; 58302: 	}
+; 58303: 	l_v21639 = l_v21648;
+; 58304: 	goto block4;
+; 58305: 
+; 58306:     block4:
+; 58307: 	OP_INT_IS_TRUE(RUNNING_ON_LLINTERP, l_v21653);
+; 58308: 	if (l_v21653) {
+; 58309: 		goto block10;
+; 58310: 	}
+; 58311: 	goto block5;
+; 58312: 
+; 58313:     block5:
+; 58314: 	l_v21654 = (struct pypy_header0 *)l_v21639;
+; 58315: 	RPyField(l_v21654, h_tid) = (GROUP_MEMBER_OFFSET(struct group_pypy_g_typeinfo_s, member20)+0L);
+
+	test	esi, esi
+	jne	SHORT $LN18 at pypy_g_ll_@159
+	call	_RPyAbort
+$LN18 at pypy_g_ll_@159:
+
+; 58316: 	OP_ADR_ADD(l_v21639, (0 + ROUND_UP_FOR_ALLOCATION(sizeof(struct pypy_tuple2_0), sizeof(struct pypy_forwarding_stub0))), l_v21656);
+; 58317: 	(&pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC)->ssgc_inst_free = l_v21656;
+; 58318: 	OP_ADR_ADD(l_v21639, 0, l_v21658);
+; 58319: 	l_v21659 = (void*)l_v21658;
+; 58320: 	l_v21696 = l_v21659;
+; 58321: 	goto block6;
+; 58322: 
+; 58323:     block6:
+; 58324: 	l_v21640 = (struct pypy_tuple2_0 *)l_v21696;
+; 58325: 	l_v21660 = (l_v21640 != NULL);
+; 58326: 	if (!l_v21660) {
+; 58327: 		goto block9;
+; 58328: 	}
+; 58329: 	goto block7;
+; 58330: 
+; 58331:     block7:
+; 58332: 	RPyField(l_v21640, t_item0) = l_v21638;
+
+	fld	QWORD PTR _l_v21638$[esp+64]
+	mov	DWORD PTR [esi], 81			; 00000051H
+	lea	ecx, DWORD PTR [esi+24]
+	mov	DWORD PTR _pypy_g_pypy_rpython_memory_gc_semispace_SemiSpaceGC+4, ecx
+	fstp	QWORD PTR [esi+8]
+
+; 58333: 	RPyField(l_v21640, t_item1) = l_v21637;
+
+	mov	DWORD PTR [esi+16], edi
+
+; 58423: 	goto block8;
+; 58424: }
+
+	pop	edi
+	mov	eax, esi
+	pop	esi
+$block6$88281:
+$block8$88289:
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret	0
+_pypy_g_ll_math_ll_math_frexp ENDP
+_TEXT	ENDS
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -39,10 +39,15 @@
         self.uses_frame_pointer = False
         self.r_localvar = self.r_localvarnofp
         self.filetag = filetag
-        # a "stack bottom" function is either main() or a callback from C code
+        # a "stack bottom" function is either pypy_main_function() or a
+        # callback from C code.  In both cases they are identified by
+        # the presence of pypy_asm_stack_bottom().
         self.is_stack_bottom = False
 
     def computegcmaptable(self, verbose=0):
+        if self.funcname in ['main', '_main']:
+            return []     # don't analyze main(), its prologue may contain
+                          # strange instructions
         self.findlabels()
         self.parse_instructions()
         try:
@@ -226,7 +231,7 @@
         # in the frame at this point.  This doesn't count the return address
         # which is the word immediately following the frame in memory.
         # The 'framesize' is set to an odd value if it is only an estimate
-        # (see visit_andl()).
+        # (see InsnCannotFollowEsp).
 
         def walker(insn, size_delta):
             check = deltas.setdefault(insn, size_delta)
@@ -266,7 +271,8 @@
 
             match = self.r_localvar_esp.match(localvar)
             if match:
-                if localvar == self.TOP_OF_STACK: # for pushl and popl, by
+                if localvar == self.TOP_OF_STACK_MINUS_WORD:
+                                                  # for pushl and popl, by
                     hint = None                   # default ebp addressing is
                 else:                             # a bit nicer
                     hint = 'esp'
@@ -521,9 +527,8 @@
         target = match.group("target")
         if target == self.ESP:
             # only for  andl $-16, %esp  used to align the stack in main().
-            # The exact amount of adjutment is not known yet, so we use
-            # an odd-valued estimate to make sure the real value is not used
-            # elsewhere by the FunctionGcRootTracker.
+            # main() should not be seen at all.  But on e.g. MSVC we see
+            # the instruction somewhere else too...
             return InsnCannotFollowEsp()
         else:
             return self.binary_insn(line)
@@ -588,10 +593,12 @@
     def _visit_push(self, line):
         match = self.r_unaryinsn.match(line)
         source = match.group(1)
-        return [InsnStackAdjust(-self.WORD)] + self.insns_for_copy(source, self.TOP_OF_STACK)
+        return self.insns_for_copy(source, self.TOP_OF_STACK_MINUS_WORD) + \
+               [InsnStackAdjust(-self.WORD)]
 
     def _visit_pop(self, target):
-        return self.insns_for_copy(self.TOP_OF_STACK, target) + [InsnStackAdjust(+self.WORD)]
+        return [InsnStackAdjust(+self.WORD)] + \
+               self.insns_for_copy(self.TOP_OF_STACK_MINUS_WORD, target)
 
     def _visit_prologue(self):
         # for the prologue of functions that use %ebp as frame pointer
@@ -983,15 +990,15 @@
     OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
     LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
     OFFSET_LABELS   = 2**30
-    TOP_OF_STACK = '0(%esp)'
+    TOP_OF_STACK_MINUS_WORD = '-4(%esp)'
 
     r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
     r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
-    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|\d*[(]%esp[)]"
+    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|-?\d*[(]%esp[)]"
     LOCALVARFP      = LOCALVAR + r"|-?\d*[(]%ebp[)]"
     r_localvarnofp  = re.compile(LOCALVAR)
     r_localvarfp    = re.compile(LOCALVARFP)
-    r_localvar_esp  = re.compile(r"(\d*)[(]%esp[)]")
+    r_localvar_esp  = re.compile(r"(-?\d*)[(]%esp[)]")
     r_localvar_ebp  = re.compile(r"(-?\d*)[(]%ebp[)]")
 
     r_rel_label      = re.compile(r"(\d+):\s*$")
@@ -1044,7 +1051,7 @@
     OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
     LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
     OFFSET_LABELS   = 2**30
-    TOP_OF_STACK = '0(%rsp)'
+    TOP_OF_STACK_MINUS_WORD = '-8(%rsp)'
 
     r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
     r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
@@ -1140,7 +1147,7 @@
     CALLEE_SAVE_REGISTERS = ['ebx', 'esi', 'edi', 'ebp']
     REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
                    for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
-    TOP_OF_STACK = 'DWORD PTR [esp]'
+    TOP_OF_STACK_MINUS_WORD = 'DWORD PTR [esp-4]'
 
     OPERAND = r'(?:(:?WORD|DWORD|BYTE) PTR |OFFSET )?[_\w?:@$]*(?:[-+0-9]+)?(:?\[[-+*\w0-9]+\])?'
     LABEL   = r'([a-zA-Z_$@.][a-zA-Z0-9_$@.]*)'
@@ -1170,7 +1177,7 @@
     r_gcroot_marker = re.compile(r"$1") # never matches
     r_gcroot_marker_var = re.compile(r"DWORD PTR .+_constant_always_one_.+pypy_asm_gcroot")
     r_gcnocollect_marker = re.compile(r"\spypy_asm_gc_nocollect\(("+OPERAND+")\);")
-    r_bottom_marker = re.compile(r"; .+\tpypy_asm_stack_bottom\(\);")
+    r_bottom_marker = re.compile(r"; .+\spypy_asm_stack_bottom\(\);")
 
     FUNCTIONS_NOT_RETURNING = {
         '__exit': None,
@@ -1323,12 +1330,11 @@
         self.verbose = verbose
         self.shuffle = shuffle
         self.gcmaptable = []
-        self.seen_main = False
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         for in_function, lines in self.find_functions(iterlines):
             if in_function:
-                tracker = self.process_function(lines, entrypoint, filename)
+                tracker = self.process_function(lines, filename)
                 lines = tracker.lines
             self.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1337,11 +1343,9 @@
     def write_newfile(self, newfile, lines, grist):
         newfile.writelines(lines)
 
-    def process_function(self, lines, entrypoint, filename):
+    def process_function(self, lines, filename):
         tracker = self.FunctionGcRootTracker(
             lines, filetag=getidentifier(filename))
-        is_main = tracker.funcname == entrypoint
-        tracker.is_stack_bottom = is_main
         if self.verbose == 1:
             sys.stderr.write('.')
         elif self.verbose > 1:
@@ -1356,7 +1360,6 @@
             self.gcmaptable[:0] = table
         else:
             self.gcmaptable.extend(table)
-        self.seen_main |= is_main
         return tracker
 
 class ElfAssemblerParser(AssemblerParser):
@@ -1432,11 +1435,6 @@
         if functionlines:
             yield in_function, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(DarwinAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
 class DarwinAssemblerParser64(DarwinAssemblerParser):
     format = "darwin64"
     FunctionGcRootTracker = DarwinFunctionGcRootTracker64
@@ -1494,11 +1492,6 @@
             "missed the end of the previous function")
         yield False, functionlines
 
-    def process_function(self, lines, entrypoint, filename):
-        entrypoint = '_' + entrypoint
-        return super(MsvcAssemblerParser, self).process_function(
-            lines, entrypoint, filename)
-
     def write_newfile(self, newfile, lines, grist):
         newlines = []
         for line in lines:
@@ -1560,24 +1553,21 @@
         self.shuffle = shuffle     # to debug the sorting logic in asmgcroot.py
         self.format = format
         self.gcmaptable = []
-        self.seen_main = False
 
     def dump_raw_table(self, output):
-        print >> output, "seen_main = %d" % (self.seen_main,)
+        print 'raw table'
         for entry in self.gcmaptable:
             print >> output, entry
 
     def reload_raw_table(self, input):
         firstline = input.readline()
-        assert firstline.startswith("seen_main = ")
-        self.seen_main |= bool(int(firstline[len("seen_main = "):].strip()))
+        assert firstline == 'raw table\n'
         for line in input:
             entry = eval(line)
             assert type(entry) is tuple
             self.gcmaptable.append(entry)
 
     def dump(self, output):
-        assert self.seen_main
 
         def _globalname(name, disp=""):
             return tracker_cls.function_names_prefix + name
@@ -1649,8 +1639,8 @@
             s = """\
             /* See description in asmgcroot.py */
             .cfi_startproc
-            movq\t%rdi, %rdx\t/* 1st argument, which is the callback */
-            movq\t%rsi, %rcx\t/* 2nd argument, which is gcrootanchor */
+            /* %rdi is the 1st argument, which is the callback */
+            /* %rsi is the 2nd argument, which is gcrootanchor */
             movq\t%rsp, %rax\t/* my frame top address */
             pushq\t%rax\t\t/* ASM_FRAMEDATA[8] */
             pushq\t%rbp\t\t/* ASM_FRAMEDATA[7] */
@@ -1663,15 +1653,15 @@
             /* Add this ASM_FRAMEDATA to the front of the circular linked */
             /* list.  Let's call it 'self'.                               */
 
-            movq\t8(%rcx), %rax\t/* next = gcrootanchor->next */
+            movq\t8(%rsi), %rax\t/* next = gcrootanchor->next */
             pushq\t%rax\t\t\t\t/* self->next = next */
-            pushq\t%rcx\t\t\t/* self->prev = gcrootanchor */
-            movq\t%rsp, 8(%rcx)\t/* gcrootanchor->next = self */
+            pushq\t%rsi\t\t\t/* self->prev = gcrootanchor */
+            movq\t%rsp, 8(%rsi)\t/* gcrootanchor->next = self */
             movq\t%rsp, 0(%rax)\t\t\t/* next->prev = self */
             .cfi_def_cfa_offset 80\t/* 9 pushes + the retaddr = 80 bytes */
 
             /* note: the Mac OS X 16 bytes aligment must be respected. */
-            call\t*%rdx\t\t/* invoke the callback */
+            call\t*%rdi\t\t/* invoke the callback */
 
             /* Detach this ASM_FRAMEDATA from the circular linked list */
             popq\t%rsi\t\t/* prev = self->prev */
@@ -1688,7 +1678,7 @@
             popq\t%rcx\t\t/* ignored      ASM_FRAMEDATA[8] */
 
             /* the return value is the one of the 'call' above, */
-            /* because %rax (and possibly %rdx) are unmodified  */
+            /* because %rax is unmodified  */
             ret
             .cfi_endproc
             """
@@ -1835,11 +1825,11 @@
             """.replace("__gccallshapes", _globalname("__gccallshapes"))
             output.writelines(shapelines)
 
-    def process(self, iterlines, newfile, entrypoint='main', filename='?'):
+    def process(self, iterlines, newfile, filename='?'):
         parser = PARSERS[format](verbose=self.verbose, shuffle=self.shuffle)
         for in_function, lines in parser.find_functions(iterlines):
             if in_function:
-                tracker = parser.process_function(lines, entrypoint, filename)
+                tracker = parser.process_function(lines, filename)
                 lines = tracker.lines
             parser.write_newfile(newfile, lines, filename.split('.')[0])
         if self.verbose == 1:
@@ -1848,7 +1838,6 @@
             self.gcmaptable[:0] = parser.gcmaptable
         else:
             self.gcmaptable.extend(parser.gcmaptable)
-        self.seen_main |= parser.seen_main
 
 
 class UnrecognizedOperation(Exception):
@@ -1915,7 +1904,6 @@
             format = 'elf64'
         else:
             format = 'elf'
-    entrypoint = 'main'
     while len(sys.argv) > 1:
         if sys.argv[1] == '-v':
             del sys.argv[1]
@@ -1929,9 +1917,9 @@
         elif sys.argv[1].startswith('-f'):
             format = sys.argv[1][2:]
             del sys.argv[1]
-        elif sys.argv[1].startswith('-m'):
-            entrypoint = sys.argv[1][2:]
-            del sys.argv[1]
+        elif sys.argv[1].startswith('-'):
+            print >> sys.stderr, "unrecognized option:", sys.argv[1]
+            sys.exit(1)
         else:
             break
     tracker = GcRootTracker(verbose=verbose, shuffle=shuffle, format=format)
@@ -1940,7 +1928,7 @@
         firstline = f.readline()
         f.seek(0)
         assert firstline, "file %r is empty!" % (fn,)
-        if firstline.startswith('seen_main = '):
+        if firstline == 'raw table\n':
             tracker.reload_raw_table(f)
             f.close()
         else:
@@ -1948,7 +1936,7 @@
             lblfn = fn[:-2] + '.lbl.s'
             g = open(lblfn, 'w')
             try:
-                tracker.process(f, g, entrypoint=entrypoint, filename=fn)
+                tracker.process(f, g, filename=fn)
             except:
                 g.close()
                 os.unlink(lblfn)
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -570,7 +570,10 @@
             mk.definition('ASMFILES', sfiles)
             mk.definition('ASMLBLFILES', lblsfiles)
             mk.definition('GCMAPFILES', gcmapfiles)
-            mk.definition('DEBUGFLAGS', '-O2 -fomit-frame-pointer -g')
+            if sys.platform == 'win32':
+                mk.definition('DEBUGFLAGS', '/Zi')
+            else:
+                mk.definition('DEBUGFLAGS', '-O2 -fomit-frame-pointer -g')
 
             if self.config.translation.shared:
                 mk.definition('PYPY_MAIN_FUNCTION', "pypy_main_startup")
@@ -602,7 +605,7 @@
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -m$(PYPY_MAIN_FUNCTION) -t $*.s > $@']
+                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
@@ -613,7 +616,7 @@
                 mk.rule('%.lbl.s %.gcmap', '%.s',
                         [python +
                              '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
-                             '-m$(PYPY_MAIN_FUNCTION) -t $< > $*.gctmp',
+                             '-t $< > $*.gctmp',
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
                         [python +
@@ -623,7 +626,10 @@
                 mk.rule('.PRECIOUS', '%.s', "# don't remove .s files if Ctrl-C'ed")
 
         else:
-            mk.definition('DEBUGFLAGS', '-O1 -g')
+            if sys.platform == 'win32':
+                mk.definition('DEBUGFLAGS', '/Zi')
+            else:
+                mk.definition('DEBUGFLAGS', '-O1 -g')
         mk.write()
         #self.translator.platform,
         #                           ,
@@ -682,28 +688,54 @@
     def getothernodes(self):
         return self.othernodes[:]
 
+    def getbasecfilefornode(self, node, basecname):
+        # For FuncNode instances, use the python source filename (relative to
+        # the top directory):
+        if hasattr(node.obj, 'graph'):
+            g = node.obj.graph
+            # Lookup the filename from the function.
+            # However, not all FunctionGraph objs actually have a "func":
+            if hasattr(g, 'func'):
+                if g.filename.endswith('.py'):
+                    localpath = py.path.local(g.filename)
+                    pypkgpath = localpath.pypkgpath()
+                    if pypkgpath:
+                        relpypath =  localpath.relto(pypkgpath)
+                        return relpypath.replace('.py', '.c')
+        return basecname
+
     def splitnodesimpl(self, basecname, nodes, nextra, nbetween,
                        split_criteria=SPLIT_CRITERIA):
+        # Gather nodes by some criteria:
+        nodes_by_base_cfile = {}
+        for node in nodes:
+            c_filename = self.getbasecfilefornode(node, basecname)
+            if c_filename in nodes_by_base_cfile:
+                nodes_by_base_cfile[c_filename].append(node)
+            else:
+                nodes_by_base_cfile[c_filename] = [node]
+
         # produce a sequence of nodes, grouped into files
         # which have no more than SPLIT_CRITERIA lines
-        iternodes = iter(nodes)
-        done = [False]
-        def subiter():
-            used = nextra
-            for node in iternodes:
-                impl = '\n'.join(list(node.implementation())).split('\n')
-                if not impl:
-                    continue
-                cost = len(impl) + nbetween
-                yield node, impl
-                del impl
-                if used + cost > split_criteria:
-                    # split if criteria met, unless we would produce nothing.
-                    raise StopIteration
-                used += cost
-            done[0] = True
-        while not done[0]:
-            yield self.uniquecname(basecname), subiter()
+        for basecname in nodes_by_base_cfile:
+            iternodes = iter(nodes_by_base_cfile[basecname])
+            done = [False]
+            def subiter():
+                used = nextra
+                for node in iternodes:
+                    impl = '\n'.join(list(node.implementation())).split('\n')
+                    if not impl:
+                        continue
+                    cost = len(impl) + nbetween
+                    yield node, impl
+                    del impl
+                    if used + cost > split_criteria:
+                        # split if criteria met, unless we would produce nothing.
+                        raise StopIteration
+                    used += cost
+                done[0] = True
+            while not done[0]:
+                yield self.uniquecname(basecname), subiter()
 
     def gen_readable_parts_of_source(self, f):
         split_criteria_big = SPLIT_CRITERIA
@@ -900,8 +932,9 @@
     print >> f, '}'
 
 def commondefs(defines):
-    from pypy.rlib.rarithmetic import LONG_BIT
+    from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
     defines['PYPY_LONG_BIT'] = LONG_BIT
+    defines['PYPY_LONGLONG_BIT'] = LONGLONG_BIT
 
 def add_extra_files(eci):
     srcdir = py.path.local(autopath.pypydir).join('translator', 'c', 'src')
diff --git a/pypy/translator/c/node.py b/pypy/translator/c/node.py
--- a/pypy/translator/c/node.py
+++ b/pypy/translator/c/node.py
@@ -1031,7 +1031,7 @@
             if (issubclass(value, BaseException) and
                 value.__module__ == 'exceptions'):
                 return 'PyExc_' + value.__name__
-            if value is py.code._AssertionError:
+            if issubclass(value, AssertionError):
                 return 'PyExc_AssertionError'
             if value is _StackOverflow:
                 return 'PyExc_RuntimeError'
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <string.h>
 #include "src/cjkcodecs/multibytecodec.h"
 
 
@@ -93,6 +94,22 @@
   return d->inbuf - d->inbuf_start;
 }
 
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_decodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen * sizeof(Py_UNICODE));
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
+
 /************************************************************/
 
 struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
@@ -209,3 +226,19 @@
 {
   return d->inbuf - d->inbuf_start;
 }
+
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_encodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen);
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -102,6 +102,8 @@
 Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
 Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *, Py_ssize_t, Py_ssize_t);
 
 struct pypy_cjk_enc_s {
   const MultibyteCodec *codec;
@@ -119,6 +121,8 @@
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d);
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *, Py_ssize_t, Py_ssize_t);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/translator/c/src/int.h b/pypy/translator/c/src/int.h
--- a/pypy/translator/c/src/int.h
+++ b/pypy/translator/c/src/int.h
@@ -73,15 +73,28 @@
 
 /* NB. shifting has same limitations as C: the shift count must be
        >= 0 and < LONG_BITS. */
-#define OP_INT_RSHIFT(x,y,r)    r = Py_ARITHMETIC_RIGHT_SHIFT(long, x, y)
-#define OP_UINT_RSHIFT(x,y,r)   r = (x) >> (y)
-#define OP_LLONG_RSHIFT(x,y,r)  r = Py_ARITHMETIC_RIGHT_SHIFT(PY_LONG_LONG,x,y)
-#define OP_ULLONG_RSHIFT(x,y,r) r = (x) >> (y)
+#define CHECK_SHIFT_RANGE(y, bits) RPyAssert(y >= 0 && y < bits, \
+	       "The shift count is outside of the supported range")
 
-#define OP_INT_LSHIFT(x,y,r)    r = (x) << (y)
-#define OP_UINT_LSHIFT(x,y,r)   r = (x) << (y)
-#define OP_LLONG_LSHIFT(x,y,r)  r = (x) << (y)
-#define OP_ULLONG_LSHIFT(x,y,r) r = (x) << (y)
+
+#define OP_INT_RSHIFT(x,y,r)    CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+						r = Py_ARITHMETIC_RIGHT_SHIFT(long, x, (y))
+#define OP_UINT_RSHIFT(x,y,r)   CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+						r = (x) >> (y)
+#define OP_LLONG_RSHIFT(x,y,r)  CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+						r = Py_ARITHMETIC_RIGHT_SHIFT(PY_LONG_LONG,x, (y))
+#define OP_ULLONG_RSHIFT(x,y,r) CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+						r = (x) >> (y)
+
+
+#define OP_INT_LSHIFT(x,y,r)    CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+							r = (x) << (y)
+#define OP_UINT_LSHIFT(x,y,r)   CHECK_SHIFT_RANGE(y, PYPY_LONG_BIT); \
+							r = (x) << (y)
+#define OP_LLONG_LSHIFT(x,y,r)  CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+							r = (x) << (y)
+#define OP_ULLONG_LSHIFT(x,y,r) CHECK_SHIFT_RANGE(y, PYPY_LONGLONG_BIT); \
+							r = (x) << (y)
 
 #define OP_INT_LSHIFT_OVF(x,y,r) \
 	OP_INT_LSHIFT(x,y,r); \
diff --git a/pypy/translator/c/src/main.h b/pypy/translator/c/src/main.h
--- a/pypy/translator/c/src/main.h
+++ b/pypy/translator/c/src/main.h
@@ -23,12 +23,19 @@
 #include "src/winstuff.c"
 #endif
 
-int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+#ifdef __GNUC__
+/* Hack to prevent this function from being inlined.  Helps asmgcc
+   because the main() function has often a different prologue/epilogue. */
+int pypy_main_function(int argc, char *argv[]) __attribute__((__noinline__));
+#endif
+
+int pypy_main_function(int argc, char *argv[])
 {
     char *errmsg;
     int i, exitcode;
     RPyListOfString *list;
 
+    pypy_asm_stack_bottom();
     instrument_setup();
 
     if (sizeof(void*) != SIZEOF_LONG) {
@@ -72,6 +79,12 @@
     fprintf(stderr, "Fatal error during initialization: %s\n", errmsg);
 #endif
     abort();
+    return 1;
+}
+
+int PYPY_MAIN_FUNCTION(int argc, char *argv[])
+{
+    return pypy_main_function(argc, argv);
 }
 
 #endif /* PYPY_NOT_MAIN_FILE */
diff --git a/pypy/translator/c/src/mem.h b/pypy/translator/c/src/mem.h
--- a/pypy/translator/c/src/mem.h
+++ b/pypy/translator/c/src/mem.h
@@ -222,6 +222,15 @@
 
 #endif /* USING_BOEHM_GC */
 
+
+#ifdef USING_NO_GC_AT_ALL
+#define OP_BOEHM_ZERO_MALLOC(size, r, restype, is_atomic, is_varsize)  \
+  r = (restype) calloc(1, size);
+#define OP_BOEHM_DISAPPEARING_LINK(link, obj, r)  /* nothing */
+#define OP_GC__DISABLE_FINALIZERS(r)  /* nothing */
+#define OP_GC__ENABLE_FINALIZERS(r)  /* nothing */
+#endif
+
 /************************************************************/
 /* weakref support */
 
diff --git a/pypy/translator/c/test/test_newgc.py b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -1117,6 +1117,7 @@
         S = lltype.GcStruct('S', ('u', lltype.Ptr(U)))
         A = lltype.GcArray(lltype.Ptr(S))
         filename = self.filename_dump_typeids_z
+        open_flags = os.O_WRONLY | os.O_CREAT | getattr(os, 'O_BINARY', 0)
 
         def fn():
             s = lltype.malloc(S)
@@ -1128,7 +1129,7 @@
             #
             p = rgc.get_typeids_z()
             s = ''.join([p[i] for i in range(len(p))])
-            fd = os.open(filename, os.O_WRONLY | os.O_CREAT, 0666)
+            fd = os.open(filename, open_flags, 0666)
             os.write(fd, s)
             os.close(fd)
             return 0
@@ -1137,7 +1138,7 @@
 
     def test_write_typeids_z(self):
         self.run("write_typeids_z")
-        f = open(self.filename_dump_typeids_z)
+        f = open(self.filename_dump_typeids_z, 'rb')
         data_z = f.read()
         f.close()
         import zlib
diff --git a/pypy/translator/c/test/test_standalone.py b/pypy/translator/c/test/test_standalone.py
--- a/pypy/translator/c/test/test_standalone.py
+++ b/pypy/translator/c/test/test_standalone.py
@@ -55,6 +55,13 @@
         data = cbuilder.cmdexec('hi there')
         assert data.startswith('''hello world\nargument count: 2\n   'hi'\n   'there'\n''')
 
+        # Verify that the generated C files have sane names:
+        gen_c_files = [str(f) for f in cbuilder.extrafiles]
+        for expfile in ('rlib_rposix.c', 
+                        'rpython_lltypesystem_rstr.c',
+                        'translator_c_test_test_standalone.c'):
+            assert cbuilder.targetdir.join(expfile) in gen_c_files
+
     def test_print(self):
         def entry_point(argv):
             print "hello simpler world"
@@ -596,6 +603,42 @@
         # The traceback stops at f() because it's the first function that
         # captures the AssertionError, which makes the program abort.
 
+    def test_int_lshift_too_large(self):
+        from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
+        def entry_point(argv):
+            a = int(argv[1])
+            b = int(argv[2])
+            print a << b
+            return 0
+
+        t, cbuilder = self.compile(entry_point, debug=True)
+        out = cbuilder.cmdexec("10 2", expect_crash=False)
+        assert out.strip() == str(10 << 2)
+        cases = [-4, LONG_BIT, LONGLONG_BIT]
+        for x in cases:
+            out, err = cbuilder.cmdexec("%s %s" % (1, x), expect_crash=True)
+            lines = err.strip()
+            assert 'The shift count is outside of the supported range' in lines
+
+    def test_llong_rshift_too_large(self):
+        from pypy.rlib.rarithmetic import LONG_BIT, LONGLONG_BIT
+        def entry_point(argv):
+            a = r_longlong(int(argv[1]))
+            b = r_longlong(int(argv[2]))
+            print a >> b
+            return 0
+
+        t, cbuilder = self.compile(entry_point, debug=True)
+        out = cbuilder.cmdexec("10 2", expect_crash=False)
+        assert out.strip() == str(10 >> 2)
+        out = cbuilder.cmdexec("%s %s" % (-42, LONGLONG_BIT - 1), expect_crash=False)
+        assert out.strip() == '-1'
+        cases = [-4, LONGLONG_BIT]
+        for x in cases:
+            out, err = cbuilder.cmdexec("%s %s" % (1, x), expect_crash=True)
+            lines = err.strip()
+            assert 'The shift count is outside of the supported range' in lines
+
     def test_ll_assert_error_debug(self):
         def entry_point(argv):
             ll_assert(len(argv) != 1, "foobar")
diff --git a/pypy/translator/driver.py b/pypy/translator/driver.py
--- a/pypy/translator/driver.py
+++ b/pypy/translator/driver.py
@@ -559,6 +559,7 @@
                 shutil.copy(str(soname), str(newsoname))
                 self.log.info("copied: %s" % (newsoname,))
             self.c_entryp = newexename
+        self.log.info('usession directory: %s' % (udir,))
         self.log.info("created: %s" % (self.c_entryp,))
 
     def task_compile_c(self):
diff --git a/pypy/translator/goal/app_main.py b/pypy/translator/goal/app_main.py
--- a/pypy/translator/goal/app_main.py
+++ b/pypy/translator/goal/app_main.py
@@ -143,6 +143,7 @@
     for key, value in items:
         print '  --jit %s=N %slow-level JIT parameter (default %s)' % (
             key, ' '*(18-len(key)), value)
+    print '  --jit off                  turn off the JIT'
 
 def print_version(*args):
     print "Python", sys.version
diff --git a/pypy/translator/goal/targetnumpystandalone.py b/pypy/translator/goal/targetnumpystandalone.py
--- a/pypy/translator/goal/targetnumpystandalone.py
+++ b/pypy/translator/goal/targetnumpystandalone.py
@@ -10,46 +10,32 @@
 """
 
 import time
-from pypy.module.micronumpy.numarray import SingleDimArray, Code, compute
+from pypy.module.micronumpy.compile import numpy_compile
 from pypy.jit.codewriter.policy import JitPolicy
-
-def create_array(size):
-    a = SingleDimArray(size)
-    for i in range(size):
-        a.storage[i] = float(i % 10)
-    return a
+from pypy.rpython.annlowlevel import hlstr
 
 def entry_point(argv):
     if len(argv) != 3:
         print __doc__
         return 1
-    bytecode = argv[1]
-    for b in bytecode:
-        if b not in 'alf':
-            print "WRONG BYTECODE"
-            print __doc__
-            return 2
     try:
         size = int(argv[2])
     except ValueError:
         print "INVALID LITERAL FOR INT:", argv[2]
         print __doc__
         return 3
-    no_arrays = bytecode.count('l')
-    no_floats = bytecode.count('f')
-    arrays = []
-    floats = []
-    for i in range(no_arrays):
-        arrays.append(create_array(size))
-    for i in range(no_floats):
-        floats.append(float(i + 1))
-    code = Code(bytecode, arrays, floats)
     t0 = time.time()
-    compute(code)
-    print "bytecode:", bytecode, "size:", size
+    main(argv[0], size)
+    print "bytecode:", argv[0], "size:", size
     print "took:", time.time() - t0
     return 0
 
+def main(bc, size):
+    if not isinstance(bc, str):
+        bc = hlstr(bc) # for tests
+    a = numpy_compile(bc, size)
+    a = a.compute()
+
 def target(*args):
     return entry_point, None
 
diff --git a/pypy/translator/goal/translate.py b/pypy/translator/goal/translate.py
--- a/pypy/translator/goal/translate.py
+++ b/pypy/translator/goal/translate.py
@@ -103,6 +103,8 @@
     specname = os.path.splitext(os.path.basename(targetspec))[0]
     sys.path.insert(0, os.path.dirname(targetspec))
     mod = __import__(specname)
+    if 'target' not in mod.__dict__:
+        raise Exception("file %r is not a valid targetxxx.py." % (targetspec,))
     return mod.__dict__
 
 def parse_options_and_load_target():
@@ -149,6 +151,9 @@
             log.ERROR("Could not find target %r" % (arg, ))
             sys.exit(1)
 
+    # apply the platform settings
+    set_platform(config)
+
     targetspec = translateconfig.targetspec
     targetspec_dic = load_target(targetspec)
 
@@ -164,9 +169,6 @@
                 existing_config=config,
                 translating=True)
 
-    # apply the platform settings
-    set_platform(config)
-
     # apply the optimization level settings
     set_opt_level(config, translateconfig.opt)
 
@@ -184,7 +186,7 @@
             print "\n\nTarget specific help:\n\n"
             targetspec_dic['print_help'](config)
         print "\n\nFor detailed descriptions of the command line options see"
-        print "http://codespeak.net/pypy/dist/pypy/doc/config/commandline.html"
+        print "http://pypy.readthedocs.org/en/latest/config/commandline.html"
         sys.exit(0)
     
     return targetspec_dic, translateconfig, config, args
diff --git a/pypy/translator/jvm/opcodes.py b/pypy/translator/jvm/opcodes.py
--- a/pypy/translator/jvm/opcodes.py
+++ b/pypy/translator/jvm/opcodes.py
@@ -98,6 +98,7 @@
     'jit_marker':               Ignore,
     'jit_force_virtualizable':  Ignore,
     'jit_force_virtual':        DoNothing,
+    'jit_force_quasi_immutable': Ignore,
 
     'debug_assert':              [], # TODO: implement?
     'debug_start_traceback':    Ignore,
diff --git a/pypy/translator/jvm/src/pypy/PyPy.java b/pypy/translator/jvm/src/pypy/PyPy.java
--- a/pypy/translator/jvm/src/pypy/PyPy.java
+++ b/pypy/translator/jvm/src/pypy/PyPy.java
@@ -964,12 +964,15 @@
         return a + File.separator + b;
     }
 
-    public String ll_strtod_formatd(String format, double d)
+    public String ll_strtod_formatd(double d, char code, int precision, int flags)
     {
         // XXX: this is really a quick hack to make things work.
         // it should disappear, because this function is not
         // supported by ootypesystem.
-        return Double.toString(d); // XXX: we are ignoring "format"
+        DecimalFormat format = new DecimalFormat("0.###");
+        format.setMinimumFractionDigits(precision);
+        format.setMaximumFractionDigits(precision);
+        return format.format(d);
     }
 
     // ----------------------------------------------------------------------
diff --git a/pypy/translator/jvm/test/test_float.py b/pypy/translator/jvm/test/test_float.py
--- a/pypy/translator/jvm/test/test_float.py
+++ b/pypy/translator/jvm/test/test_float.py
@@ -22,3 +22,14 @@
 
     def test_r_singlefloat(self):
         py.test.skip("not implemented: single-precision floats")
+
+    def test_format_float(self):
+        from pypy.rlib.rfloat import _formatd
+        def fn(precision):
+            return _formatd(10.01, 'd', precision, 0)
+
+        res = self.interpret(fn, [2])
+        assert res == "10.01"
+
+        res = self.interpret(fn, [1])
+        assert res == "10.0"
diff --git a/pypy/translator/platform/__init__.py b/pypy/translator/platform/__init__.py
--- a/pypy/translator/platform/__init__.py
+++ b/pypy/translator/platform/__init__.py
@@ -38,6 +38,7 @@
     c_environ = None
 
     relevant_environ = ()
+    log_errors = True
 
     so_prefixes = ('',)
 
@@ -120,11 +121,12 @@
         if returncode != 0:
             errorfile = outname.new(ext='errors')
             errorfile.write(stderr, 'wb')
-            stderrlines = stderr.splitlines()
-            for line in stderrlines:
-                log.Error(line)
-            # ^^^ don't use ERROR, because it might actually be fine.
-            # Also, ERROR confuses lib-python/conftest.py.
+            if self.log_errors:
+                stderrlines = stderr.splitlines()
+                for line in stderrlines:
+                    log.Error(line)
+                # ^^^ don't use ERROR, because it might actually be fine.
+                # Also, ERROR confuses lib-python/conftest.py.
             raise CompilationError(stdout, stderr)
         else:
             for line in stderr.splitlines():
diff --git a/pypy/translator/platform/darwin.py b/pypy/translator/platform/darwin.py
--- a/pypy/translator/platform/darwin.py
+++ b/pypy/translator/platform/darwin.py
@@ -68,12 +68,10 @@
 
 class Darwin_i386(Darwin):
     name = "darwin_i386"
-    link_flags = ('-arch', 'i386', '-mmacosx-version-min=10.4')
-    cflags = ('-arch', 'i386', '-O3', '-fomit-frame-pointer',
-              '-mmacosx-version-min=10.4')
+    link_flags = ('-arch', 'i386')
+    cflags = ('-arch', 'i386', '-O3', '-fomit-frame-pointer')
 
 class Darwin_x86_64(Darwin):
     name = "darwin_x86_64"
-    link_flags = ('-arch', 'x86_64', '-mmacosx-version-min=10.4')
-    cflags = ('-arch', 'x86_64', '-O3', '-fomit-frame-pointer',
-              '-mmacosx-version-min=10.4')
+    link_flags = ('-arch', 'x86_64')
+    cflags = ('-arch', 'x86_64', '-O3', '-fomit-frame-pointer')
diff --git a/pytest.py b/pytest.py
old mode 100644
new mode 100755
--- a/pytest.py
+++ b/pytest.py
@@ -1,7 +1,6 @@
+#!/usr/bin/env python
 """
 unit and functional testing with Python.
-(pypy version of startup script)
-see http://pytest.org for details.
 """
 __all__ = ['main']
 
@@ -9,23 +8,6 @@
 from _pytest import core as cmdline
 from _pytest import __version__
 
-# This pytest.py script is located in the pypy source tree
-# which has a copy of pytest and py within its source tree.
-# If the environment also has an installed version of pytest/py
-# we are bound to get warnings so we disable them.
-# XXX eventually pytest and py should not be inlined shipped
-# with the pypy source code but become a requirement for installation.
-
-import warnings
-warnings.filterwarnings("ignore",
-    "Module py was already imported", category=UserWarning)
-warnings.filterwarnings("ignore",
-    "Module _pytest was already imported",
-    category=UserWarning)
-warnings.filterwarnings("ignore",
-    "Module pytest was already imported",
-    category=UserWarning)
-
 if __name__ == '__main__': # if run as a script or by 'python -m pytest'
     raise SystemExit(main())
 else: