[pypy-commit] pypy jitypes2: hg merge default

Mon Jun 6 14:55:50 CEST 2011

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: jitypes2
Changeset: r44740:221ed58760b9
Date: 2011-06-06 14:55 +0200
http://bitbucket.org/pypy/pypy/changeset/221ed58760b9/

Log:	hg merge default

diff --git a/lib-python/modified-2.7/distutils/sysconfig.py b/lib-python/modified-2.7/distutils/sysconfig.py
--- a/lib-python/modified-2.7/distutils/sysconfig.py
+++ b/lib-python/modified-2.7/distutils/sysconfig.py
@@ -20,8 +20,10 @@
 if '__pypy__' in sys.builtin_module_names:
     from distutils.sysconfig_pypy import *
     from distutils.sysconfig_pypy import _config_vars # needed by setuptools
+    from distutils.sysconfig_pypy import _variable_rx # read_setup_file()
 else:
     from distutils.sysconfig_cpython import *
     from distutils.sysconfig_cpython import _config_vars # needed by setuptools
+    from distutils.sysconfig_cpython import _variable_rx # read_setup_file()
 
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -116,3 +116,7 @@
     if compiler.compiler_type == "unix":
         compiler.compiler_so.extend(['-fPIC', '-Wimplicit'])
         compiler.shared_lib_extension = get_config_var('SO')
+
+from sysconfig_cpython import (
+    parse_makefile, _variable_rx, expand_makefile_vars)
+
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -173,6 +173,11 @@
     >>>> A.__del__ = lambda self: None
     __main__:1: RuntimeWarning: a __del__ method added to an existing type will not be called
 
+Even more obscure: the same is true, for old-style classes, if you attach
+the ``__del__`` to an instance (even in CPython this does not work with
+new-style classes).  You get a RuntimeWarning in PyPy.  To fix these cases
+just make sure there is a ``__del__`` method in the class to start with.
+
 
 Subclasses of built-in types
 ----------------------------
diff --git a/pypy/doc/image/jitviewer.png b/pypy/doc/image/jitviewer.png
new file mode 100644
index 0000000000000000000000000000000000000000..ad2abca5c88125061fa519dcf3f9fada577573ee
GIT binary patch

[cut]

diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -21,6 +21,8 @@
 
 * `speed.pypy.org`_: Daily benchmarks of how fast PyPy is
 
+* `potential project ideas`_: In case you want to get your feet wet...
+
 Documentation for the PyPy Python Interpreter
 ===============================================
 
@@ -59,8 +61,6 @@
   (if they are not already developed in the FAQ_).
   You can find logs of the channel here_.
 
-.. XXX play1? 
-
 Meeting PyPy developers
 =======================
 
@@ -83,7 +83,7 @@
 .. _`Release 1.5`: http://pypy.org/download.html
 .. _`speed.pypy.org`: http://speed.pypy.org
 .. _`RPython toolchain`: translation.html
-
+.. _`potential project ideas`: project-ideas.html
 
 Project Documentation
 =====================================
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -11,6 +11,12 @@
 `mailing list`_. This is simply for the reason that small possible projects
 tend to change very rapidly.
 
+This list is mostly for having on overview on potential projects. This list is
+by definition not exhaustive and we're pleased if people come up with their
+own improvement ideas. In any case, if you feel like working on some of those
+projects, or anything else in PyPy, pop up on IRC or write to us on the
+`mailing list`_.
+
 Numpy improvements
 ------------------
 
@@ -23,27 +29,89 @@
 
 * interface with fortran/C libraries.
 
-Potential mentors: fijal
+Improving the jitviewer
+------------------------
 
-JIT tooling
------------
+Analyzing performance of applications is always tricky. We have various
+tools, for example a `jitviewer`_ that help us analyze performance.
 
-xxx
+The jitviewer shows the code generated by the PyPy JIT in a hierarchical way,
+as shown by the screenshot below:
+
+  - at the bottom level, it shows the Python source code of the compiled loops
+
+  - for each source code line, it shows the corresponding Python bytecode
+
+  - for each opcode, it shows the corresponding jit operations, which are the
+    ones actually sent to the backend for compiling (such as ``i15 = i10 <
+    2000`` in the example)
+
+.. image:: image/jitviewer.png
+
+We would like to add one level to this hierarchy, by showing the generated
+machine code for each jit operation.  The necessary information is already in
+the log file produced by the JIT, so it is "only" a matter of teaching the
+jitviewer to display it.  Ideally, the machine code should be hidden by
+default and viewable on request.
+
+The jitviewer is a web application based on flask and jinja2 (and jQuery on
+the client): if you have great web developing skills and want to help PyPy,
+this is an ideal task to get started, because it does not require any deep
+knowledge of the internals.
+
+Translation Toolchain
+---------------------
+
+* Incremental or distributed translation.
+
+* Allow separate compilation of extension modules.
 
 Work on some of other languages
 -------------------------------
 
-xxx
+There are various languages implemented using the RPython translation toolchain.
+One of the most interesting is the `JavaScript implementation`_, but there
+are others like scheme or prolog. An interesting project would be to improve
+the jittability of those or to experiment with various optimizations.
 
 Various GCs
 -----------
 
-xxx
+PyPy has pluggable garbage collection policy. This means that various garbage
+collectors can be written for specialized purposes, or even various
+experiments can be done for the general purpose. Examples
+
+* An incremental garbage collector that has specified maximal pause times,
+  crucial for games
+
+* A garbage collector that compact memory better for mobile devices
+
+* A concurrent garbage collector (a lot of work)
 
 Remove the GIL
 --------------
 
-xxx
+This is a major task that requiers lots of thinking. However, few subprojects
+can be potentially specified, unless a better plan can be thought out:
 
-.. _`issue tracker`: ...
-.. _`mailing list`: ...
+* A thread-aware garbage collector
+
+* Better RPython primitives for dealing with concurrency
+
+* JIT passes to remove locks on objects
+
+* (maybe) implement locking in Python interpreter
+
+* alternatively, look at Software Transactional Memory
+
+Experiment (again) with LLVM backend for RPython compilation
+------------------------------------------------------------
+
+We already tried working with LLVM and at the time, LLVM was not mature enough
+for our needs. It's possible that this has changed, reviving the LLVM backend
+(or writing new from scratch) for static compilation would be a good project.
+
+.. _`issue tracker`: http://bugs.pypy.org
+.. _`mailing list`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`jitviewer`: http://mail.python.org/mailman/listinfo/pypy-dev
+.. _`JavaScript implementation`: https://bitbucket.org/pypy/lang-js/overview
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,13 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +495,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +510,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +551,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +642,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -768,54 +703,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -838,19 +751,40 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
 
-    def _gen_write_barrier(self, newops, v_base, v_value):
-        args = [v_base, v_value]
+    def _gen_write_barrier(self, newops, v_base, v_value_or_index):
+        # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
+        # (regular case), or an index (case of write_barrier_from_array)
+        args = [v_base, v_value_or_index]
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                self._gen_write_barrier(newops, v_base, v_index)
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,90 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getarg(1) == v_index
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +586,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +612,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +632,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -57,146 +57,146 @@
         return ConstInt(heaptracker.adr2int(addr))
 
     def test_call_aligned_with_spilled_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                local_floats = list(floats)
-                local_ints = list(ints)
-                args = []
-                spills = []
-                funcargs = []
-                float_count = 0
-                int_count = 0
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append('f%d' % float_count)
-                        spills.append('force_spill(f%d)' % float_count)
-                        float_count += 1
-                        funcargs.append(F)
-                    else:
-                        args.append('i%d' % int_count)
-                        spills.append('force_spill(i%d)' % int_count)
-                        int_count += 1
-                        funcargs.append(I)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            local_floats = list(floats)
+            local_ints = list(ints)
+            args = []
+            spills = []
+            funcargs = []
+            float_count = 0
+            int_count = 0
+            for i in range(8):
+                if case & (1<<i):
+                    args.append('f%d' % float_count)
+                    spills.append('force_spill(f%d)' % float_count)
+                    float_count += 1
+                    funcargs.append(F)
+                else:
+                    args.append('i%d' % int_count)
+                    spills.append('force_spill(i%d)' % int_count)
+                    int_count += 1
+                    funcargs.append(I)
 
-                arguments = ', '.join(args)
-                spill_ops = '\n'.join(spills)
+            arguments = ', '.join(args)
+            spill_ops = '\n'.join(spills)
 
-                FUNC = self.FuncType(funcargs, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+            FUNC = self.FuncType(funcargs, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                ops = '[%s]\n' % arguments
-                ops += '%s\n' % spill_ops
-                ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
-                ops += 'finish(f99, %s)\n' % arguments
+            ops = '[%s]\n' % arguments
+            ops += '%s\n' % spill_ops
+            ops += 'f99 = call(ConstClass(func_ptr), %s, descr=calldescr)\n' % arguments
+            ops += 'finish(f99, %s)\n' % arguments
 
-                loop = parse(ops, namespace=locals())
-                looptoken = LoopToken()
-                done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
-                self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-                expected_result = self._prepare_args(args, floats, ints)
+            loop = parse(ops, namespace=locals())
+            looptoken = LoopToken()
+            done_number = self.cpu.get_fail_descr_number(loop.operations[-1].getdescr())
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            expected_result = self._prepare_args(args, floats, ints)
 
-                res = self.cpu.execute_token(looptoken)
-                x = longlong.getrealfloat(cpu.get_latest_value_float(0))
-                assert abs(x - expected_result) < 0.0001
+            res = self.cpu.execute_token(looptoken)
+            x = longlong.getrealfloat(cpu.get_latest_value_float(0))
+            assert abs(x - expected_result) < 0.0001
 
     def test_call_aligned_with_imm_values(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(constfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(ConstInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(constfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(ConstInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_aligned_with_args_on_the_stack(self):
-            from pypy.rlib.libffi import types
-            cpu = self.cpu
-            if not cpu.supports_floats:
-                py.test.skip('requires floats')
+        from pypy.rlib.libffi import types
+        cpu = self.cpu
+        if not cpu.supports_floats:
+            py.test.skip('requires floats')
 
 
-            def func(*args):
-                return float(sum(args))
+        def func(*args):
+            return float(sum(args))
 
-            F = lltype.Float
-            I = lltype.Signed
-            floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
-            ints = [7, 11, 23, 13, -42, 1111, 95, 1]
-            for case in range(256):
-                result = 0.0
-                args = []
-                argslist = []
-                local_floats = list(floats)
-                local_ints = list(ints)
-                for i in range(8):
-                    if case & (1<<i):
-                        args.append(F)
-                        arg = local_floats.pop()
-                        result += arg
-                        argslist.append(boxfloat(arg))
-                    else:
-                        args.append(I)
-                        arg = local_ints.pop()
-                        result += arg
-                        argslist.append(BoxInt(arg))
-                FUNC = self.FuncType(args, F)
-                FPTR = self.Ptr(FUNC)
-                func_ptr = llhelper(FPTR, func)
-                calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
-                funcbox = self.get_funcbox(cpu, func_ptr)
+        F = lltype.Float
+        I = lltype.Signed
+        floats = [0.7, 5.8, 0.1, 0.3, 0.9, -2.34, -3.45, -4.56]
+        ints = [7, 11, 23, 13, -42, 1111, 95, 1]
+        for case in range(256):
+            result = 0.0
+            args = []
+            argslist = []
+            local_floats = list(floats)
+            local_ints = list(ints)
+            for i in range(8):
+                if case & (1<<i):
+                    args.append(F)
+                    arg = local_floats.pop()
+                    result += arg
+                    argslist.append(boxfloat(arg))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    result += arg
+                    argslist.append(BoxInt(arg))
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
 
-                res = self.execute_operation(rop.CALL,
-                                             [funcbox] + argslist,
-                                             'float', descr=calldescr)
-                assert abs(res.getfloat() - result) < 0.0001
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            assert abs(res.getfloat() - result) < 0.0001
 
     def test_call_alignment_call_assembler(self):
         from pypy.rlib.libffi import types
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1663,7 +1663,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1682,12 +1682,48 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b):
+            record.append((a, b))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB,
+                                   [BoxPtr(sgcref), ConstInt(123)],
+                                   'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123)]
             else:
                 assert record == []
 
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -383,6 +383,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -396,7 +397,8 @@
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -468,7 +470,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -560,9 +563,9 @@
                 funcname = op.getarg(0)._get_str()
                 break
         else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
+            funcname = '?'
+        return "%s (loop counter %d)" % (funcname,
+                                         len(self.loop_run_counters))
 
     def _register_counter(self):
         if self._debug:
@@ -2236,6 +2239,8 @@
         # function remember_young_pointer() from the GC.  The two arguments
         # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
         # registers that need to be saved and restored across the call.
+        # If op.getarg(1) is a int, it is an array index and we must call
+        # instead remember_young_pointer_from_array().
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
@@ -2267,13 +2272,19 @@
             remap_frame_layout(self, arglocs[:2], [edi, esi],
                                X86_64_SCRATCH_REG)
 
+        if op.getarg(1).type == INT:
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            func = descr.get_write_barrier_fn(self.cpu)
+
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
             self.mc.ADD_ri(esp.value, 2*WORD)
         for i in range(2, len(arglocs)):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -158,11 +158,12 @@
         self.jump_target_descr = None
         self.close_stack_struct = 0
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -173,15 +174,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -882,12 +884,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
+        loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
+        # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
         # because it will be needed anyway by the following setfield_gc.
         # It avoids loading it twice from the memory.
         loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        arglocs = [loc_base, loc_newvalue_or_index]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -362,7 +362,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +378,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -458,6 +457,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -4,6 +4,7 @@
 from pypy.objspace.flow.model import Constant, Variable
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib import rstack
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -452,9 +453,17 @@
         # Called during a residual call from the assembler, if the code
         # actually needs to force one of the virtualrefs or the virtualizable.
         # Implemented by forcing *all* virtualrefs and the virtualizable.
-        faildescr = cpu.force(token)
-        assert isinstance(faildescr, ResumeGuardForcedDescr)
-        faildescr.handle_async_forcing(token)
+
+        # don't interrupt me! If the stack runs out in force_from_resumedata()
+        # then we have seen cpu.force() but not self.save_data(), leaving in
+        # an inconsistent state
+        rstack._stack_criticalcode_start()
+        try:
+            faildescr = cpu.force(token)
+            assert isinstance(faildescr, ResumeGuardForcedDescr)
+            faildescr.handle_async_forcing(token)
+        finally:
+            rstack._stack_criticalcode_stop()
 
     def handle_async_forcing(self, force_token):
         from pypy.jit.metainterp.resume import force_from_resumedata
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -471,7 +471,8 @@
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
+    'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
+                          # (for the write barrier, latter is in an array)
     'DEBUG_MERGE_POINT/2',      # debugging only
     'JIT_DEBUG/*',              # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -66,6 +66,8 @@
     call_pure_results = {}
     class jitdriver_sd:
         warmstate = FakeState()
+        on_compile = staticmethod(lambda *args: None)
+        on_compile_bridge = staticmethod(lambda *args: None)
 
 def test_compile_new_loop():
     cpu = FakeCPU()
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -181,6 +181,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
@@ -207,6 +208,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = llhelper(GET_LOCATION, get_location)
         _confirm_enter_jit_ptr = None
@@ -230,6 +232,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = llhelper(ENTER_JIT, confirm_enter_jit)
@@ -253,6 +256,7 @@
         cpu = None
         memory_manager = None
     class FakeJitDriverSD:
+        jitdriver = None
         _green_args_spec = [lltype.Signed, lltype.Float]
         _get_printable_location_ptr = None
         _confirm_enter_jit_ptr = None
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -32,16 +32,29 @@
                 space.wrap(reason))
             w_res = space.call_function(w_errorhandler, w_exc)
             if (not space.is_true(space.isinstance(w_res, space.w_tuple))
-                or space.len_w(w_res) != 2):
+                or space.len_w(w_res) != 2
+                or not space.is_true(space.isinstance(
+                                 space.getitem(w_res, space.wrap(0)),
+                                 space.w_unicode))):
+                if decode:
+                    msg = ("decoding error handler must return "
+                           "(unicode, int) tuple, not %s")
+                else:
+                    msg = ("encoding error handler must return "
+                           "(unicode, int) tuple, not %s")
                 raise operationerrfmt(
-                    space.w_TypeError,
-                    "encoding error handler must return "
-                    "(unicode, int) tuple, not %s",
+                    space.w_TypeError, msg,
                     space.str_w(space.repr(w_res)))
             w_replace, w_newpos = space.fixedview(w_res, 2)
-            newpos = space.int_w(w_newpos)
-            if (newpos < 0):
-                newpos = len(input) + newpos
+            try:
+                newpos = space.int_w(w_newpos)
+            except OperationError, e:
+                if not e.match(space, space.w_OverflowError):
+                    raise
+                newpos = -1
+            else:
+                if newpos < 0:
+                    newpos = len(input) + newpos
             if newpos < 0 or newpos > len(input):
                 raise operationerrfmt(
                     space.w_IndexError,
@@ -50,7 +63,9 @@
                 replace = space.unicode_w(w_replace)
                 return replace, newpos
             else:
-                replace = space.str_w(w_replace)
+                from pypy.objspace.std.unicodetype import encode_object
+                w_str = encode_object(space, w_replace, encoding, None)
+                replace = space.str_w(w_str)
                 return replace, newpos
         return unicode_call_errorhandler
 
@@ -160,15 +175,7 @@
 def ignore_errors(space, w_exc):
     check_exception(space, w_exc)
     w_end = space.getattr(w_exc, space.wrap('end'))
-    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        return space.newtuple([space.wrap(''), w_end])
-    elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
-          space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
-        return space.newtuple([space.wrap(u''), w_end])
-    else:
-        typename = space.type(w_exc).getname(space, '?')
-        raise operationerrfmt(space.w_TypeError,
-            "don't know how to handle %s in error callback", typename)
+    return space.newtuple([space.wrap(u''), w_end])
 
 def replace_errors(space, w_exc):
     check_exception(space, w_exc)
@@ -176,7 +183,7 @@
     w_end = space.getattr(w_exc, space.wrap('end'))
     size = space.int_w(w_end) - space.int_w(w_start)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        text = '?' * size
+        text = u'?' * size
         return space.newtuple([space.wrap(text), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         text = u'\ufffd'
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -540,6 +540,17 @@
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 
+    def test_encode_error_bad_handler(self):
+        import codecs
+        codecs.register_error("test.bad_handler", lambda e: (repl, 1))
+        assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+        repl = u"\u1234"
+        raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
+               "test.bad_handler")
+        repl = u"\u00E9"
+        s = u"\u5678".encode("latin-1", "test.bad_handler")
+        assert s == '\xe9'
+
     def test_charmap_encode(self):
         assert 'xxx'.encode('charmap') == 'xxx'
 
@@ -593,3 +604,11 @@
         assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
         assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
         assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
+    def test_bad_handler_string_result(self):
+        import _codecs
+        def f(exc):
+            return ('foo', exc.end)
+        _codecs.register_error("test.test_codecs_not_a_string", f)
+        raises(TypeError, u'\u1234'.encode, 'ascii',
+               'test.test_codecs_not_a_string')
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,6 +3,8 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.tool.autopath import pypydir
 
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+
 
 class EncodeDecodeError(Exception):
     def __init__(self, start, end, reason):
@@ -103,8 +105,12 @@
                                           [DECODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
                                          [DECODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_dec_replace_on_error = llexternal('pypy_cjk_dec_replace_on_error',
+                                           [DECODEBUF_P, rffi.CWCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def decode(codec, stringdata):
+def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
@@ -112,10 +118,12 @@
         if not decodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_dec_chunk(decodebuf)
-            if r != 0:
-                multibytecodec_decerror(decodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_dec_chunk(decodebuf)
+                if r == 0:
+                    break
+                multibytecodec_decerror(decodebuf, r, errors,
+                                        errorcb, namecb, stringdata)
             src = pypy_cjk_dec_outbuf(decodebuf)
             length = pypy_cjk_dec_outlen(decodebuf)
             return rffi.wcharpsize2unicode(src, length)
@@ -126,7 +134,8 @@
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
 
-def multibytecodec_decerror(decodebuf, e):
+def multibytecodec_decerror(decodebuf, e, errors,
+                            errorcb, namecb, stringdata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -138,12 +147,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the unicode to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_dec_inbuf_consumed(decodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = u""
+    elif errors == "replace":
+        replace = UNICODE_REPLACEMENT_CHARACTER
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               stringdata, start, end)
+    inbuf = rffi.get_nonmoving_unicodebuffer(replace)
+    try:
+        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmoving_unicodebuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
 
 # ____________________________________________________________
 # Encoding
@@ -165,8 +189,12 @@
                                           [ENCODEBUF_P], rffi.SSIZE_T)
 pypy_cjk_enc_inbuf_consumed = llexternal('pypy_cjk_enc_inbuf_consumed',
                                          [ENCODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_enc_replace_on_error = llexternal('pypy_cjk_enc_replace_on_error',
+                                           [ENCODEBUF_P, rffi.CCHARP,
+                                            rffi.SSIZE_T, rffi.SSIZE_T],
+                                           rffi.SSIZE_T)
 
-def encode(codec, unicodedata):
+def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
@@ -174,14 +202,18 @@
         if not encodebuf:
             raise MemoryError
         try:
-            r = pypy_cjk_enc_chunk(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
-            r = pypy_cjk_enc_reset(encodebuf)
-            if r != 0:
-                multibytecodec_encerror(encodebuf, r)
-                assert False
+            while True:
+                r = pypy_cjk_enc_chunk(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
+            while True:
+                r = pypy_cjk_enc_reset(encodebuf)
+                if r == 0:
+                    break
+                multibytecodec_encerror(encodebuf, r, errors,
+                                        codec, errorcb, namecb, unicodedata)
             src = pypy_cjk_enc_outbuf(encodebuf)
             length = pypy_cjk_enc_outlen(encodebuf)
             return rffi.charpsize2str(src, length)
@@ -192,7 +224,8 @@
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
-def multibytecodec_encerror(encodebuf, e):
+def multibytecodec_encerror(encodebuf, e, errors,
+                            codec, errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -204,9 +237,27 @@
     else:
         raise RuntimeError
     #
-    # if errors == ERROR_REPLACE:...
-    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    # compute the string to use as a replacement -> 'replace', and
+    # the current position in the input 'unicodedata' -> 'end'
     start = pypy_cjk_enc_inbuf_consumed(encodebuf)
     end = start + esize
-    if 1:  # errors == ERROR_STRICT:
+    if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
+    elif errors == "ignore":
+        replace = ""
+    elif errors == "replace":
+        try:
+            replace = encode(codec, u"?")
+        except EncodeDecodeError:
+            replace = "?"
+    else:
+        assert errorcb
+        replace, end = errorcb(errors, namecb, reason,
+                               unicodedata, start, end)
+    inbuf = rffi.get_nonmovingbuffer(replace)
+    try:
+        r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
+    finally:
+        rffi.free_nonmovingbuffer(replace, inbuf)
+    if r == MBERR_NOMEMORY:
+        raise MemoryError
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import OperationError
 from pypy.module._multibytecodec import c_codecs
+from pypy.module._codecs.interp_codecs import CodecState
 
 
 class MultibyteCodec(Wrappable):
@@ -13,13 +14,13 @@
 
     @unwrap_spec(input=str, errors="str_or_None")
     def decode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.decode(self.codec, input)
+            output = c_codecs.decode(self.codec, input, errors,
+                                     state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeDecodeError,
@@ -37,13 +38,13 @@
 
     @unwrap_spec(input=unicode, errors="str_or_None")
     def encode(self, space, input, errors=None):
-        if errors is not None and errors != 'strict':
-            raise OperationError(space.w_NotImplementedError,    # XXX
-                                 space.wrap("errors='%s' in _multibytecodec"
-                                            % errors))
+        if errors is None:
+            errors = 'strict'
+        state = space.fromcache(CodecState)
         #
         try:
-            output = c_codecs.encode(self.codec, input)
+            output = c_codecs.encode(self.codec, input, errors,
+                                     state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
             raise OperationError(
                 space.w_UnicodeEncodeError,
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -36,6 +36,36 @@
         e = raises(UnicodeDecodeError, codec.decode, "~{xyz}").value
         assert e.args == ('hz', '~{xyz}', 2, 4, 'illegal multibyte sequence')
 
+    def test_decode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='ignore')
+        assert r == (u'def\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'ignore')
+        assert r == (u'def\u5fcf', 9)
+
+    def test_decode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("def~{}abc", errors='replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+        r = codec.decode("def~{}abc", 'replace')
+        assert r == (u'def\ufffd\u5fcf', 9)
+
+    def test_decode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.decode_custom_error_handler",
+                              lambda e: (u'\u1234\u5678', e.end))
+        u = "abc\xDD".decode("hz", "test.decode_custom_error_handler")
+        assert u == u'abc\u1234\u5678'
+
+    def test_decode_custom_error_handler_overflow(self):
+        import codecs
+        import sys
+        codecs.register_error("test.test_decode_custom_error_handler_overflow",
+                              lambda e: (u'', sys.maxint + 1))
+        raises(IndexError, "abc\xDD".decode, "hz", "test.test_decode_custom_error_handler_overflow")
+
     def test_encode_hz(self):
         import _codecs_cn
         codec = _codecs_cn.getcodec("hz")
@@ -54,3 +84,24 @@
         assert e.start == 3
         assert e.end == 4
         assert e.reason == 'illegal multibyte sequence'
+
+    def test_encode_hz_ignore(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'ignore')
+        assert r == ('abcdef', 7)
+        assert type(r[0]) is str
+
+    def test_encode_hz_replace(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'abc\u1234def', 'replace')
+        assert r == ('abc?def', 7)
+        assert type(r[0]) is str
+
+    def test_encode_custom_error_handler(self):
+        import codecs
+        codecs.register_error("test.multi_bad_handler", lambda e: (repl, 1))
+        repl = u"\u2014"
+        s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
+        assert s == '\xA1\xAA'
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -36,6 +36,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_decode_hz_ignore():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'ignore')
+    assert u == u'def\u5fcf'
+
+def test_decode_hz_replace():
+    c = getcodec("hz")
+    u = decode(c, 'def~{}abc', 'replace')
+    assert u == u'def\ufffd\u5fcf'
+
 def test_encode_hz():
     c = getcodec("hz")
     s = encode(c, u'foobar')
@@ -51,6 +61,16 @@
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
+def test_encode_hz_ignore():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'ignore')
+    assert s == 'abcdef'
+
+def test_encode_hz_replace():
+    c = getcodec("hz")
+    s = encode(c, u'abc\u1234def', 'replace')
+    assert s == 'abc?def'
+
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
     s = encode(c, u'\u83ca\u5730\u6642\u592b')
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -348,6 +348,7 @@
     '_Py_TrueStruct#': ('PyObject*', 'space.w_True'),
     '_Py_ZeroStruct#': ('PyObject*', 'space.w_False'),
     '_Py_NotImplementedStruct#': ('PyObject*', 'space.w_NotImplemented'),
+    '_Py_EllipsisObject#': ('PyObject*', 'space.w_Ellipsis'),
     'PyDateTimeAPI': ('PyDateTime_CAPI*', 'None'),
     }
 FORWARD_DECLS = []
@@ -966,6 +967,7 @@
     state = space.fromcache(State)
     if state.find_extension(name, path) is not None:
         return
+    old_context = state.package_context
     state.package_context = name, path
     try:
         from pypy.rlib import rdynload
@@ -991,7 +993,7 @@
         generic_cpy_call(space, initfunc)
         state.check_and_raise_exception()
     finally:
-        state.package_context = None, None
+        state.package_context = old_context
     state.fixup_extension(name, path)
 
 @specialize.ll()
diff --git a/pypy/module/cpyext/classobject.py b/pypy/module/cpyext/classobject.py
--- a/pypy/module/cpyext/classobject.py
+++ b/pypy/module/cpyext/classobject.py
@@ -31,4 +31,9 @@
         return w_result
     return w_instance.w_class.lookup(space, name)
 
+ at cpython_api([PyObject, PyObject, PyObject], PyObject)
+def PyClass_New(space, w_bases, w_dict, w_name):
+    w_classobj = space.gettypefor(W_ClassObject)
+    return space.call_function(w_classobj,
+                               w_name, w_bases, w_dict)
 
diff --git a/pypy/module/cpyext/frameobject.py b/pypy/module/cpyext/frameobject.py
--- a/pypy/module/cpyext/frameobject.py
+++ b/pypy/module/cpyext/frameobject.py
@@ -1,6 +1,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    cpython_api, bootstrap_function, PyObjectFields, cpython_struct)
+    cpython_api, bootstrap_function, PyObjectFields, cpython_struct,
+    CANNOT_FAIL)
 from pypy.module.cpyext.pyobject import (
     PyObject, Py_DecRef, make_ref, from_ref, track_reference,
     make_typedescr, get_typedescr)
@@ -9,6 +10,7 @@
 from pypy.module.cpyext.funcobject import PyCodeObject
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pytraceback import PyTraceback
 
 PyFrameObjectStruct = lltype.ForwardReference()
 PyFrameObject = lltype.Ptr(PyFrameObjectStruct)
@@ -80,3 +82,8 @@
     frame = space.interp_w(PyFrame, w_frame)
     record_application_traceback(space, state.operror, frame, 0)
     return 0
+
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyTraceBack_Check(space, w_obj):
+    obj = space.interpclass_w(w_obj)
+    return obj is not None and isinstance(obj, PyTraceback)
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -69,6 +69,10 @@
     assert isinstance(w_method, Method)
     return borrow_from(w_method, w_method.w_class)
 
+ at cpython_api([PyObject], PyObject)
+def PyClassMethod_New(space, w_function):
+    return space.call_method(space.builtin, "classmethod", w_function)
+
 def unwrap_list_of_strings(space, w_list):
     return [space.str_w(w_item) for w_item in space.fixedview(w_list)]
 
diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py
--- a/pypy/module/cpyext/intobject.py
+++ b/pypy/module/cpyext/intobject.py
@@ -4,7 +4,7 @@
 from pypy.module.cpyext.api import (
     cpython_api, build_type_checkers, PyObject,
     CONST_STRING, CANNOT_FAIL, Py_ssize_t)
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, intmask, LONG_TEST
 import sys
 
 PyInt_Check, PyInt_CheckExact = build_type_checkers("Int")
@@ -73,13 +73,24 @@
                              space.wrap("an integer is required, got NULL"))
     return space.int_w(w_obj) # XXX this is wrong on win64
 
+LONG_MAX = int(LONG_TEST - 1)
+
+ at cpython_api([rffi.SIZE_T], PyObject)
+def PyInt_FromSize_t(space, ival):
+    """Create a new integer object with a value of ival. If the value exceeds
+    LONG_MAX, a long integer object is returned.
+    """
+    if ival <= LONG_MAX:
+        return space.wrap(intmask(ival))
+    return space.wrap(ival)
+
 @cpython_api([Py_ssize_t], PyObject)
 def PyInt_FromSsize_t(space, ival):
     """Create a new integer object with a value of ival. If the value is larger
     than LONG_MAX or smaller than LONG_MIN, a long integer object is
     returned.
     """
-    return space.wrap(ival) # XXX this is wrong on win64
+    return space.wrap(ival)
 
 @cpython_api([CONST_STRING, rffi.CCHARPP, rffi.INT_real], PyObject)
 def PyInt_FromString(space, str, pend, base):
diff --git a/pypy/module/cpyext/number.py b/pypy/module/cpyext/number.py
--- a/pypy/module/cpyext/number.py
+++ b/pypy/module/cpyext/number.py
@@ -49,6 +49,13 @@
     failure.  This is the equivalent of the Python expression long(o)."""
     return space.long(w_obj)
 
+ at cpython_api([PyObject], PyObject)
+def PyNumber_Index(space, w_obj):
+    """Returns the o converted to a Python int or long on success or NULL with a
+    TypeError exception raised on failure.
+    """
+    return space.index(w_obj)
+
 def func_rename(newname):
     return lambda func: func_with_new_name(func, newname)
 
diff --git a/pypy/module/cpyext/src/modsupport.c b/pypy/module/cpyext/src/modsupport.c
--- a/pypy/module/cpyext/src/modsupport.c
+++ b/pypy/module/cpyext/src/modsupport.c
@@ -611,8 +611,8 @@
 	if (result != NULL && n > 0) {
 		for (i = 0; i < n; ++i) {
 			tmp = (PyObject *)va_arg(va, PyObject *);
+			Py_INCREF(tmp);
 			PyTuple_SET_ITEM(result, i, tmp);
-			Py_INCREF(tmp);
 		}
 	}
 	return result;
diff --git a/pypy/module/cpyext/stringobject.py b/pypy/module/cpyext/stringobject.py
--- a/pypy/module/cpyext/stringobject.py
+++ b/pypy/module/cpyext/stringobject.py
@@ -2,7 +2,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, cpython_struct, bootstrap_function, build_type_checkers,
-    PyObjectFields, Py_ssize_t, CONST_STRING)
+    PyObjectFields, Py_ssize_t, CONST_STRING, CANNOT_FAIL)
 from pypy.module.cpyext.pyerrors import PyErr_BadArgument
 from pypy.module.cpyext.pyobject import (
     PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
@@ -203,6 +203,10 @@
     ref[0] = rffi.cast(PyObject, py_newstr)
     return 0
 
+ at cpython_api([PyObject, PyObject], rffi.INT, error=CANNOT_FAIL)
+def _PyString_Eq(space, w_str1, w_str2):
+    return space.eq_w(w_str1, w_str2)
+
 @cpython_api([PyObjectP, PyObject], lltype.Void)
 def PyString_Concat(space, ref, w_newpart):
     """Create a new string object in *string containing the contents of newpart
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -172,12 +172,6 @@
     This is equivalent to (PyBUF_ND)."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer], lltype.Void)
-def PyBuffer_Release(space, view):
-    """Release the buffer view.  This should be called when the buffer
-    is no longer being used as it may free memory from it."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP], Py_ssize_t, error=CANNOT_FAIL)
 def PyBuffer_SizeFromFormat(space, format):
     """Return the implied ~Py_buffer.itemsize from the struct-stype
@@ -198,13 +192,6 @@
     given shape with the given number of bytes per element."""
     raise NotImplementedError
 
- at cpython_api([Py_buffer, PyObject, rffi.VOIDP, Py_ssize_t, rffi.INT_real, rffi.INT_real], rffi.INT_real, error=-1)
-def PyBuffer_FillInfo(space, view, obj, buf, len, readonly, infoflags):
-    """Fill in a buffer-info structure, view, correctly for an exporter that can
-    only share a contiguous chunk of memory of "unsigned bytes" of the given
-    length.  Return 0 on success and -1 (with raising an error) on error."""
-    raise NotImplementedError
-
 @cpython_api([Py_buffer], PyObject)
 def PyMemoryView_FromBuffer(space, view):
     """Create a memoryview object wrapping the given buffer-info structure view.
@@ -1094,14 +1081,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyImport_ReloadModule(space, m):
-    """Reload a module.  This is best described by referring to the built-in
-    Python function reload(), as the standard reload() function calls this
-    function directly.  Return a new reference to the reloaded module, or NULL
-    with an exception set on failure (the module still exists in this case)."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, PyObject], PyObject)
 def PyImport_ExecCodeModule(space, name, co):
     """Given a module name (possibly of the form package.module) and a code
@@ -1140,13 +1119,6 @@
     of the bytecode file, in little-endian byte order."""
     raise NotImplementedError
 
- at cpython_api([], PyObject)
-def PyImport_GetModuleDict(space):
-    """Return the dictionary used for the module administration (a.k.a.
-    sys.modules).  Note that this is a per-interpreter variable."""
-    borrow_from()
-    raise NotImplementedError
-
 @cpython_api([PyObject], PyObject)
 def PyImport_GetImporter(space, path):
     """Return an importer object for a sys.path/pkg.__path__ item
@@ -1701,13 +1673,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([rffi.SIZE_T], PyObject)
-def PyInt_FromSize_t(space, ival):
-    """Create a new integer object with a value of ival. If the value exceeds
-    LONG_MAX, a long integer object is returned.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.ULONGLONG, error=-1)
 def PyInt_AsUnsignedLongLongMask(space, io):
     """Will first attempt to cast the object to a PyIntObject or
@@ -1920,13 +1885,6 @@
     Reference counts are still not increased in this case."""
     raise NotImplementedError
 
- at cpython_api([PyObject], PyObject)
-def PyNumber_Index(space, o):
-    """Returns the o converted to a Python int or long on success or NULL with a
-    TypeError exception raised on failure.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject, rffi.INT_real], PyObject)
 def PyNumber_ToBase(space, n, base):
     """Returns the integer n converted to base as a string with a base
@@ -2254,15 +2212,6 @@
     standard C library function exit(status)."""
     raise NotImplementedError
 
- at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
-def PyTuple_GetSlice(space, p, low, high):
-    """Take a slice of the tuple pointed to by p from low to high and return it
-    as a new tuple.
-
-    This function used an int type for low and high. This might
-    require changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([], rffi.INT_real, error=CANNOT_FAIL)
 def PyTuple_ClearFreeList(space):
     """Clear the free list. Return the total number of freed items.
@@ -2275,14 +2224,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([PyTypeObjectPtr], lltype.Void)
-def PyType_Modified(space, type):
-    """Invalidate the internal lookup cache for the type and all of its
-    subtypes.  This function must be called after any manual
-    modification of the attributes or base classes of the type.
-    """
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyType_IS_GC(space, o):
     """Return true if the type object includes support for the cycle detector; this
diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py
--- a/pypy/module/cpyext/test/test_classobject.py
+++ b/pypy/module/cpyext/test/test_classobject.py
@@ -40,3 +40,14 @@
         assert not isinstance(api.PyObject_GetAttr(w_instance, space.wrap('f')), Function)
         # _PyInstance_Lookup returns the raw descriptor
         assert isinstance(api._PyInstance_Lookup(w_instance, space.wrap('f')), Function)
+
+    def test_pyclass_new(self, space, api):
+        w_bases = space.newtuple([])
+        w_dict = space.newdict()
+        w_name = space.wrap("C")
+        w_class = api.PyClass_New(w_bases, w_dict, w_name)
+        assert not space.isinstance_w(w_class, space.w_type)
+        w_instance = space.call_function(w_class)
+        assert api.PyInstance_Check(w_instance)
+        assert space.is_true(space.call_method(space.builtin, "isinstance",
+                                               w_instance, w_class))
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -193,3 +193,32 @@
             return args
         assert module.call_func(f) == ("text", 42, None)
         assert module.call_method("text") == 2
+
+    def test_CallFunctionObjArgs(self):
+        module = self.import_extension('foo', [
+            ("call_func", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *res = PyObject_CallFunctionObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   Py_None, NULL);
+                Py_DECREF(t);
+                return res;
+             """),
+            ("call_method", "METH_VARARGS",
+             """
+                PyObject *t = PyString_FromString("t");
+                PyObject *count = PyString_FromString("count");
+                PyObject *res = PyObject_CallMethodObjArgs(
+                   PyTuple_GetItem(args, 0),
+                   count, t, NULL);
+                Py_DECREF(t);
+                Py_DECREF(count);
+                return res;
+             """),
+            ])
+        def f(*args):
+            return args
+        assert module.call_func(f) == (None,)
+        assert module.call_method("text") == 2
+        
diff --git a/pypy/module/cpyext/test/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py
--- a/pypy/module/cpyext/test/test_frameobject.py
+++ b/pypy/module/cpyext/test/test_frameobject.py
@@ -64,3 +64,31 @@
         # Cython does not work on CPython as well...
         assert exc.traceback.tb_lineno == 42 # should be 48
         assert frame.f_lineno == 42
+
+    def test_traceback_check(self):
+        module = self.import_extension('foo', [
+            ("traceback_check", "METH_NOARGS",
+             """
+                 int check;
+                 PyObject *type, *value, *tb;
+                 PyObject *ret = PyRun_String("XXX", Py_eval_input, 
+                                              Py_None, Py_None);
+                 if (ret) {
+                     Py_DECREF(ret);
+                     PyErr_SetString(PyExc_AssertionError, "should raise");
+                     return NULL;
+                 }
+                 PyErr_Fetch(&type, &value, &tb);
+                 check = PyTraceBack_Check(tb);
+                 Py_XDECREF(type);
+                 Py_XDECREF(value);
+                 Py_XDECREF(tb);
+                 if (check) {
+                     Py_RETURN_TRUE;
+                 }
+                 else {
+                     Py_RETURN_FALSE;
+                 }
+             """),
+            ])
+        assert module.traceback_check()
diff --git a/pypy/module/cpyext/test/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py
--- a/pypy/module/cpyext/test/test_funcobject.py
+++ b/pypy/module/cpyext/test/test_funcobject.py
@@ -44,3 +44,19 @@
         assert w_code.co_firstlineno == 3
         rffi.free_charp(filename)
         rffi.free_charp(funcname)
+
+    def test_classmethod(self, space, api):
+        w_function = space.appexec([], """():
+            def method(x): return x
+            return method
+        """)
+        w_class = space.call_function(space.w_type, space.wrap("C"),
+                                      space.newtuple([]), space.newdict())
+        w_instance = space.call_function(w_class)
+        # regular instance method
+        space.setattr(w_class, space.wrap("method"), w_function)
+        assert space.is_w(space.call_method(w_instance, "method"), w_instance)
+        # now a classmethod
+        w_classmethod = api.PyClassMethod_New(w_function)
+        space.setattr(w_class, space.wrap("classmethod"), w_classmethod)
+        assert space.is_w(space.call_method(w_instance, "classmethod"), w_class)
diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py
--- a/pypy/module/cpyext/test/test_intobject.py
+++ b/pypy/module/cpyext/test/test_intobject.py
@@ -50,3 +50,19 @@
             ])
         assert module.from_string() == 0x1234
         assert type(module.from_string()) is int
+
+    def test_size_t(self):
+        module = self.import_extension('foo', [
+            ("values", "METH_NOARGS",
+             """
+                 return Py_BuildValue("NNNN",
+                     PyInt_FromSize_t(123),
+                     PyInt_FromSize_t((size_t)-1),
+                     PyInt_FromSsize_t(123),
+                     PyInt_FromSsize_t((size_t)-1));
+             """),
+            ])
+        values = module.values()
+        types = [type(x) for x in values]
+        assert types == [int, long, int, int]
+        
diff --git a/pypy/module/cpyext/test/test_number.py b/pypy/module/cpyext/test/test_number.py
--- a/pypy/module/cpyext/test/test_number.py
+++ b/pypy/module/cpyext/test/test_number.py
@@ -25,6 +25,15 @@
         assert api.PyInt_CheckExact(w_l)
         w_l = api.PyNumber_Int(space.wrap(2 << 65))
         assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Int(space.wrap(42.3))
+        assert api.PyInt_CheckExact(w_l)
+
+    def test_number_index(self, space, api):
+        w_l = api.PyNumber_Index(space.wrap(123L))
+        assert api.PyLong_CheckExact(w_l)
+        w_l = api.PyNumber_Index(space.wrap(42.3))
+        assert w_l is None
+        api.PyErr_Clear()
 
     def test_numbermethods(self, space, api):
         assert "ab" == space.unwrap(
diff --git a/pypy/module/cpyext/test/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py
--- a/pypy/module/cpyext/test/test_sliceobject.py
+++ b/pypy/module/cpyext/test/test_sliceobject.py
@@ -67,3 +67,14 @@
              """),
             ])
         assert module.nullslice() == slice(None, None, None)
+
+    def test_ellipsis(self):
+        module = self.import_extension('foo', [
+            ("get_ellipsis", "METH_NOARGS",
+             """
+                 PyObject *ret = Py_Ellipsis;
+                 Py_INCREF(ret);
+                 return ret;
+             """),
+            ])
+        assert module.get_ellipsis() is Ellipsis
diff --git a/pypy/module/cpyext/test/test_stringobject.py b/pypy/module/cpyext/test/test_stringobject.py
--- a/pypy/module/cpyext/test/test_stringobject.py
+++ b/pypy/module/cpyext/test/test_stringobject.py
@@ -283,3 +283,7 @@
         self.raises(space, api, TypeError, api.PyString_AsEncodedObject,
             space.wrap(2), lltype.nullptr(rffi.CCHARP.TO), lltype.nullptr(rffi.CCHARP.TO)
         )
+
+    def test_eq(self, space, api):
+        assert 1 == api._PyString_Eq(space.wrap("hello"), space.wrap("hello"))
+        assert 0 == api._PyString_Eq(space.wrap("hello"), space.wrap("world"))
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -42,3 +42,9 @@
         assert api.PyTuple_Size(atuple) == 2
         assert space.eq_w(space.getitem(atuple, space.wrap(0)), space.wrap(0))
         assert space.eq_w(space.getitem(atuple, space.wrap(1)), space.wrap(1))
+
+    def test_getslice(self, space, api):
+        w_tuple = space.newtuple([space.wrap(i) for i in range(10)])
+        w_slice = api.PyTuple_GetSlice(w_tuple, 3, -3)
+        assert space.eq_w(w_slice,
+                          space.newtuple([space.wrap(i) for i in range(3, 7)]))
diff --git a/pypy/module/cpyext/tupleobject.py b/pypy/module/cpyext/tupleobject.py
--- a/pypy/module/cpyext/tupleobject.py
+++ b/pypy/module/cpyext/tupleobject.py
@@ -79,3 +79,10 @@
     Py_DecRef(space, ref[0])
     ref[0] = make_ref(space, py_newtuple)
     return 0
+
+ at cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
+def PyTuple_GetSlice(space, w_obj, low, high):
+    """Take a slice of the tuple pointed to by p from low to high and return it
+    as a new tuple.
+    """
+    return space.getslice(w_obj, space.wrap(low), space.wrap(high))
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -650,3 +650,13 @@
     name = space.str_w(w_name)
     w_obj = w_type.lookup(name)
     return borrow_from(w_type, w_obj)
+
+ at cpython_api([PyTypeObjectPtr], lltype.Void)
+def PyType_Modified(space, w_obj):
+    """Invalidate the internal lookup cache for the type and all of its
+    subtypes.  This function must be called after any manual
+    modification of the attributes or base classes of the type.
+    """
+    # PyPy already takes care of direct modifications to type.__dict__
+    # (which is a W_DictProxyObject).
+    pass
diff --git a/pypy/module/pypyjit/test/test_pypy_c.py b/pypy/module/pypyjit/test/test_pypy_c.py
--- a/pypy/module/pypyjit/test/test_pypy_c.py
+++ b/pypy/module/pypyjit/test/test_pypy_c.py
@@ -941,20 +941,6 @@
                                 ([a2, b2], 2000 * res2),
                                 ([a3, b3], 2000 * res3))
 
-    def test_dont_trace_every_iteration(self):
-        self.run_source('''
-        def main(a, b):
-            i = sa = 0
-            while i < 200:
-                if a > 0: pass
-                if 1 < b < 2: pass
-                sa += a % b
-                i += 1
-            return sa
-        ''', 22,  ([10, 20], 200 * (10 % 20)),
-                 ([-10, -20], 200 * (-10 % -20)),
-                        count_debug_merge_point=False)
-        assert self.jit_summary.tracing_no == 2
     def test_id_compare_optimization(self):
         # XXX: lower the instruction count, 35 is the old value.
         self.run_source("""
diff --git a/pypy/module/pypyjit/test_pypy_c/test_model.py b/pypy/module/pypyjit/test_pypy_c/test_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_model.py
@@ -5,6 +5,7 @@
 from lib_pypy import disassembler
 from pypy.tool.udir import udir
 from pypy.tool import logparser
+from pypy.jit.tool.jitoutput import parse_prof
 from pypy.module.pypyjit.test_pypy_c.model import Log, find_ids_range, find_ids, \
     LoopWithIds, OpMatcher
 
@@ -63,6 +64,13 @@
         rawtraces = logparser.extract_category(rawlog, 'jit-log-opt-')
         log = Log(rawtraces)
         log.result = eval(stdout)
+        #
+        summaries  = logparser.extract_category(rawlog, 'jit-summary')
+        if len(summaries) > 0:
+            log.jit_summary = parse_prof(summaries[-1])
+        else:
+            log.jit_summary = None
+        #
         return log
 
     def run_and_check(self, src, args=[], **jitopts):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
@@ -1786,7 +1786,7 @@
 
         log = self.run(main, [], threshold=80)
         loop, = log.loops_by_filename(self.filepath)
-        loop.match_by_id('loadattr',
+        assert loop.match_by_id('loadattr',
         '''
         guard_not_invalidated(descr=...)
         i19 = call(ConstClass(ll_dict_lookup), _, _, _, descr=...)
@@ -1811,11 +1811,43 @@
             a = A()
             while i < 100:
                 i += i in a # ID: contains
+                b = 0       # to make sure that JUMP_ABSOLUTE is not part of the ID
 
-            log = self.run(main, [], threshold=80)
-            loop, = log.loops_by_filename(self.filemath)
-            # XXX: haven't confirmed his is correct, it's probably missing a
-            # few instructions
-            loop.match_by_id("contains", """
-                i1 = int_add(i0, 1)
-            """)
+        log = self.run(main, [], threshold=80)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id("contains", """
+            guard_not_invalidated(descr=...)
+            i11 = force_token()
+            i12 = int_add_ovf(i5, i7)
+            guard_no_overflow(descr=...)
+        """)
+
+    def test_dont_trace_every_iteration(self):
+        def main(a, b):
+            i = sa = 0
+            while i < 300:
+                if a > 0:
+                    pass
+                if 1 < b < 2:
+                    pass
+                sa += a % b
+                i += 1
+            return sa
+        #
+        log = self.run(main, [10, 20], threshold=200)
+        assert log.result == 300 * (10 % 20)
+        assert log.jit_summary.tracing_no == 1
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i11 = int_lt(i7, 300)
+            guard_true(i11, descr=<Guard3>)
+            i12 = int_add_ovf(i8, i9)
+            guard_no_overflow(descr=<Guard4>)
+            i14 = int_add(i7, 1)
+            --TICK--
+            jump(..., descr=...)
+        """)
+        #
+        log = self.run(main, [-10, -20], threshold=200)
+        assert log.result == 300 * (-10 % -20)
+        assert log.jit_summary.tracing_no == 1
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
         hop.exception_cannot_occur()
         return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
 
+def _make_sure_does_not_move(p):
+    """'p' is a non-null GC object.  This (tries to) make sure that the
+    object does not move any more, by forcing collections if needed.
+    Warning: should ideally only be used with the minimark GC, and only
+    on objects that are already a bit old, so have a chance to be
+    already non-movable."""
+    if not we_are_translated():
+        return
+    i = 0
+    while can_move(p):
+        if i > 6:
+            raise NotImplementedError("can't make object non-movable!")
+        collect(i)
+        i += 1
+
 def _heap_stats():
     raise NotImplementedError # can't be run directly
 
diff --git a/pypy/rlib/streamio.py b/pypy/rlib/streamio.py
--- a/pypy/rlib/streamio.py
+++ b/pypy/rlib/streamio.py
@@ -141,7 +141,8 @@
 def construct_stream_tower(stream, buffering, universal, reading, writing,
                            binary):
     if buffering == 0:   # no buffering
-        pass
+        if reading:      # force some minimal buffering for readline()
+            stream = ReadlineInputStream(stream)
     elif buffering == 1:   # line-buffering
         if writing:
             stream = LineBufferingOutputStream(stream)
@@ -749,6 +750,113 @@
                                               flush_buffers=False)
 
 
+class ReadlineInputStream(Stream):
+
+    """Minimal buffering input stream.
+
+    Only does buffering for readline().  The other kinds of reads, and
+    all writes, are not buffered at all.
+    """
+
+    bufsize = 2**13 # 8 K
+
+    def __init__(self, base, bufsize=-1):
+        self.base = base
+        self.do_read = base.read   # function to fill buffer some more
+        self.do_seek = base.seek   # seek to a byte offset
+        if bufsize == -1:     # Get default from the class
+            bufsize = self.bufsize
+        self.bufsize = bufsize  # buffer size (hint only)
+        self.buf = None         # raw data (may contain "\n")
+        self.bufstart = 0
+
+    def flush_buffers(self):
+        if self.buf is not None:
+            try:
+                self.do_seek(self.bufstart-len(self.buf), 1)
+            except MyNotImplementedError:
+                pass
+            else:
+                self.buf = None
+                self.bufstart = 0
+
+    def readline(self):
+        if self.buf is not None:
+            i = self.buf.find('\n', self.bufstart)
+        else:
+            self.buf = ''
+            i = -1
+        #
+        if i < 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+            while True:
+                bufsize = max(self.bufsize, len(self.buf) >> 2)
+                data = self.do_read(bufsize)
+                if not data:
+                    result = self.buf              # end-of-file reached
+                    self.buf = None
+                    return result
+                startsearch = len(self.buf)   # there is no '\n' in buf so far
+                self.buf += data
+                i = self.buf.find('\n', startsearch)
+                if i >= 0:
+                    break
+        #
+        i += 1
+        result = self.buf[self.bufstart:i]
+        self.bufstart = i
+        return result
+
+    def peek(self):
+        if self.buf is None:
+            return ''
+        if self.bufstart > 0:
+            self.buf = self.buf[self.bufstart:]
+            self.bufstart = 0
+        return self.buf
+
+    def tell(self):
+        pos = self.base.tell()
+        if self.buf is not None:
+            pos -= (len(self.buf) - self.bufstart)
+        return pos
+
+    def readall(self):
+        result = self.base.readall()
+        if self.buf is not None:
+            result = self.buf[self.bufstart:] + result
+            self.buf = None
+            self.bufstart = 0
+        return result
+
+    def read(self, n):
+        if self.buf is None:
+            return self.do_read(n)
+        else:
+            m = n - (len(self.buf) - self.bufstart)
+            start = self.bufstart
+            if m > 0:
+                result = self.buf[start:] + self.do_read(m)
+                self.buf = None
+                self.bufstart = 0
+                return result
+            elif n >= 0:
+                self.bufstart = start + n
+                return self.buf[start : self.bufstart]
+            else:
+                return ''
+
+    seek       = PassThrough("seek",      flush_buffers=True)
+    write      = PassThrough("write",     flush_buffers=True)
+    truncate   = PassThrough("truncate",  flush_buffers=True)
+    flush      = PassThrough("flush",     flush_buffers=True)
+    flushable  = PassThrough("flushable", flush_buffers=False)
+    close      = PassThrough("close",     flush_buffers=False)
+    try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
+                                              flush_buffers=False)
+
+
 class BufferingOutputStream(Stream):
 
     """Standard buffering output stream.
diff --git a/pypy/rlib/test/test_streamio.py b/pypy/rlib/test/test_streamio.py
--- a/pypy/rlib/test/test_streamio.py
+++ b/pypy/rlib/test/test_streamio.py
@@ -1008,6 +1008,75 @@
             assert base.buf == data
 
 
+class TestReadlineInputStream:
+
+    packets = ["a", "b", "\n", "def", "\nxy\npq\nuv", "wx"]
+    lines = ["ab\n", "def\n", "xy\n", "pq\n", "uvwx"]
+
+    def makeStream(self, seek=False, tell=False, bufsize=-1):
+        base = TSource(self.packets)
+        self.source = base
+        def f(*args):
+            if seek is False:
+                raise NotImplementedError     # a bug!
+            if seek is None:
+                raise streamio.MyNotImplementedError   # can be caught
+            raise ValueError(seek)  # uh?
+        if not tell:
+            base.tell = f
+        if not seek:
+            base.seek = f
+        return streamio.ReadlineInputStream(base, bufsize)
+
+    def test_readline(self):
+        for file in [self.makeStream(), self.makeStream(bufsize=2)]:
+            i = 0
+            while 1:
+                r = file.readline()
+                if r == "":
+                    break
+                assert self.lines[i] == r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved(self):
+        for file in [self.makeStream(seek=True),
+                     self.makeStream(seek=True, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_read_interleaved_no_seek(self):
+        for file in [self.makeStream(seek=None),
+                     self.makeStream(seek=None, bufsize=2)]:
+            i = 0
+            while 1:
+                firstchar = file.read(1)
+                if firstchar == "":
+                    break
+                r = file.readline()
+                assert r != ""
+                assert self.lines[i] == firstchar + r
+                i += 1
+            assert i == len(self.lines)
+
+    def test_readline_and_readall(self):
+        file = self.makeStream(seek=True, tell=True, bufsize=2)
+        r = file.readline()
+        assert r == 'ab\n'
+        assert file.tell() == 3
+        r = file.readall()
+        assert r == 'def\nxy\npq\nuvwx'
+        r = file.readall()
+        assert r == ''
+
 
 # Speed test
 
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1020,6 +1020,7 @@
                 objhdr.tid |= GCFLAG_CARDS_SET
 
         remember_young_pointer_from_array._dont_inline_ = True
+        assert self.card_page_indices > 0
         self.remember_young_pointer_from_array = (
             remember_young_pointer_from_array)
 
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -860,9 +860,9 @@
 
     def gct_get_write_barrier_from_array_failing_case(self, hop):
         op = hop.spaceop
-        hop.genop("same_as",
-                  [self.write_barrier_from_array_failing_case_ptr],
-                  resultvar=op.result)
+        v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+                    lltype.nullptr(op.result.concretetype.TO))
+        hop.genop("same_as", [v], resultvar=op.result)
 
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <string.h>
 #include "src/cjkcodecs/multibytecodec.h"
 
 
@@ -93,6 +94,22 @@
   return d->inbuf - d->inbuf_start;
 }
 
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_decodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen * sizeof(Py_UNICODE));
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
+
 /************************************************************/
 
 struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
@@ -209,3 +226,19 @@
 {
   return d->inbuf - d->inbuf_start;
 }
+
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *newbuf, Py_ssize_t newlen,
+                                         Py_ssize_t in_offset)
+{
+  if (newlen > 0)
+    {
+      if (d->outbuf + newlen > d->outbuf_end)
+        if (expand_encodebuffer(d, newlen) == -1)
+          return MBERR_NOMEMORY;
+      memcpy(d->outbuf, newbuf, newlen);
+      d->outbuf += newlen;
+    }
+  d->inbuf = d->inbuf_start + in_offset;
+  return 0;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -102,6 +102,8 @@
 Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
 Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
+Py_ssize_t pypy_cjk_dec_replace_on_error(struct pypy_cjk_dec_s* d,
+                                         Py_UNICODE *, Py_ssize_t, Py_ssize_t);
 
 struct pypy_cjk_enc_s {
   const MultibyteCodec *codec;
@@ -119,6 +121,8 @@
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d);
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
+Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
+                                         char *, Py_ssize_t, Py_ssize_t);
 
 /* list of codecs defined in the .c files */