[pypy-commit] pypy inline-dict-ops: merge default

Fri Jul 1 15:47:18 CEST 2011

Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: inline-dict-ops
Changeset: r45228:deb116fcd516
Date: 2011-07-01 15:54 +0200
http://bitbucket.org/pypy/pypy/changeset/deb116fcd516/

Log:	merge default

diff --git a/lib-python/modified-2.7/test/test_descr.py b/lib-python/modified-2.7/test/test_descr.py
--- a/lib-python/modified-2.7/test/test_descr.py
+++ b/lib-python/modified-2.7/test/test_descr.py
@@ -4399,14 +4399,8 @@
         self.assertTrue(l.__add__ != [5].__add__)
         self.assertTrue(l.__add__ != l.__mul__)
         self.assertTrue(l.__add__.__name__ == '__add__')
-        if hasattr(l.__add__, '__self__'):
-            # CPython
-            self.assertTrue(l.__add__.__self__ is l)
-            self.assertTrue(l.__add__.__objclass__ is list)
-        else:
-            # Python implementations where [].__add__ is a normal bound method
-            self.assertTrue(l.__add__.im_self is l)
-            self.assertTrue(l.__add__.im_class is list)
+        self.assertTrue(l.__add__.__self__ is l)
+        self.assertTrue(l.__add__.__objclass__ is list)
         self.assertEqual(l.__add__.__doc__, list.__add__.__doc__)
         try:
             hash(l.__add__)
diff --git a/pypy/annotation/bookkeeper.py b/pypy/annotation/bookkeeper.py
--- a/pypy/annotation/bookkeeper.py
+++ b/pypy/annotation/bookkeeper.py
@@ -299,12 +299,13 @@
             listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
-    def getdictdef(self, is_r_dict=False):
+    def getdictdef(self, is_r_dict=False, force_non_null=False):
         """Get the DictDef associated with the current position."""
         try:
             dictdef = self.dictdefs[self.position_key]
         except KeyError:
-            dictdef = DictDef(self, is_r_dict=is_r_dict)
+            dictdef = DictDef(self, is_r_dict=is_r_dict,
+                              force_non_null=force_non_null)
             self.dictdefs[self.position_key] = dictdef
         return dictdef
 
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -311,8 +311,14 @@
 def robjmodel_we_are_translated():
     return immutablevalue(True)
 
-def robjmodel_r_dict(s_eqfn, s_hashfn):
-    dictdef = getbookkeeper().getdictdef(is_r_dict=True)
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+    if s_force_non_null is None:
+        force_non_null = False
+    else:
+        assert s_force_non_null.is_constant()
+        force_non_null = s_force_non_null.const
+    dictdef = getbookkeeper().getdictdef(is_r_dict=True,
+                                         force_non_null=force_non_null)
     dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
     return SomeDict(dictdef)
 
diff --git a/pypy/annotation/dictdef.py b/pypy/annotation/dictdef.py
--- a/pypy/annotation/dictdef.py
+++ b/pypy/annotation/dictdef.py
@@ -85,12 +85,14 @@
 
     def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
                                  s_value = s_ImpossibleValue,
-                               is_r_dict = False):
+                               is_r_dict = False,
+                           force_non_null = False):
         self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
         self.dictkey.itemof[self] = True
         self.dictvalue = DictValue(bookkeeper, s_value)
         self.dictvalue.itemof[self] = True
         self.bookkeeper = bookkeeper
+        self.force_non_null = force_non_null
 
     def read_key(self, position_key=None):
         if position_key is None:
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -134,7 +134,7 @@
 
     def accept_comp_iteration(self, codegen, index):
         self.elt.walkabout(codegen)
-        codegen.emit_op_arg(ops.SET_ADD, index)
+        codegen.emit_op_arg(ops.SET_ADD, index + 1)
 
 
 class __extend__(ast.DictComp):
@@ -148,7 +148,7 @@
     def accept_comp_iteration(self, codegen, index):
         self.value.walkabout(codegen)
         self.key.walkabout(codegen)
-        codegen.emit_op_arg(ops.MAP_ADD, index)
+        codegen.emit_op_arg(ops.MAP_ADD, index + 1)
 
 
 # These are frame blocks.
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -92,7 +92,10 @@
         return name
     if len(name) + 2 >= MANGLE_LEN:
         return name
-    if name.endswith('__'):
+    # Don't mangle __id__ or names with dots. The only time a name with a dot
+    # can occur is when we are compiling an import statement that has a package
+    # name.
+    if name.endswith('__') or '.' in name:
         return name
     try:
         i = 0
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -308,6 +308,15 @@
                "p.__name__", os.path.__name__)
         yield (self.st, 'from os import *',
                "path.__name__, sep", (os.path.__name__, os.sep))
+        yield (self.st, '''
+            class A(object):
+                def m(self):
+                    from __foo__.bar import x
+            try:
+                A().m()
+            except ImportError, e:
+                msg = str(e)
+            ''', "msg", "No module named __foo__")
 
     def test_if_stmts(self):
         yield self.st, "a = 42\nif a > 10: a += 2", "a", 44
diff --git a/pypy/interpreter/eval.py b/pypy/interpreter/eval.py
--- a/pypy/interpreter/eval.py
+++ b/pypy/interpreter/eval.py
@@ -100,12 +100,12 @@
 
     @jit.dont_look_inside
     def fast2locals(self):
-        # Copy values from self.fastlocals_w to self.w_locals
+        # Copy values from the fastlocals to self.w_locals
         if self.w_locals is None:
             self.w_locals = self.space.newdict()
         varnames = self.getcode().getvarnames()
         fastscope_w = self.getfastscope()
-        for i in range(min(len(varnames), len(fastscope_w))):
+        for i in range(min(len(varnames), self.getfastscopelength())):
             name = varnames[i]
             w_value = fastscope_w[i]
             if w_value is not None:
@@ -114,7 +114,7 @@
 
     @jit.dont_look_inside
     def locals2fast(self):
-        # Copy values from self.w_locals to self.fastlocals_w
+        # Copy values from self.w_locals to the fastlocals
         assert self.w_locals is not None
         varnames = self.getcode().getvarnames()
         numlocals = self.getfastscopelength()
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -98,7 +98,7 @@
                                                    self.closure)
                 for i in funccallunrolling:
                     if i < nargs:
-                        new_frame.fastlocals_w[i] = args_w[i]
+                        new_frame.locals_stack_w[i] = args_w[i]
                 return new_frame.run()
         elif nargs >= 1 and fast_natural_arity == Code.PASSTHROUGHARGS1:
             assert isinstance(code, gateway.BuiltinCodePassThroughArguments1)
@@ -158,7 +158,7 @@
                                                    self.closure)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
-            new_frame.fastlocals_w[i] = w_arg
+            new_frame.locals_stack_w[i] = w_arg
 
         return new_frame.run()
 
@@ -169,13 +169,13 @@
                                                    self.closure)
         for i in xrange(nargs):
             w_arg = frame.peekvalue(nargs-1-i)
-            new_frame.fastlocals_w[i] = w_arg
+            new_frame.locals_stack_w[i] = w_arg
 
         ndefs = len(self.defs_w)
         start = ndefs - defs_to_load
         i = nargs
         for j in xrange(start, ndefs):
-            new_frame.fastlocals_w[i] = self.defs_w[j]
+            new_frame.locals_stack_w[i] = self.defs_w[j]
             i += 1
         return new_frame.run()
 
diff --git a/pypy/interpreter/nestedscope.py b/pypy/interpreter/nestedscope.py
--- a/pypy/interpreter/nestedscope.py
+++ b/pypy/interpreter/nestedscope.py
@@ -170,7 +170,7 @@
         for i in range(len(args_to_copy)):
             argnum = args_to_copy[i]
             if argnum >= 0:
-                self.cells[i].set(self.fastlocals_w[argnum])
+                self.cells[i].set(self.locals_stack_w[argnum])
 
     def getfreevarname(self, index):
         freevarnames = self.pycode.co_cellvars + self.pycode.co_freevars
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -63,6 +63,7 @@
         the pypy compiler"""
         self.space = space
         eval.Code.__init__(self, name)
+        assert nlocals >= 0
         self.co_argcount = argcount
         self.co_nlocals = nlocals
         self.co_stacksize = stacksize
@@ -202,7 +203,7 @@
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
                                       fresh_virtualizable=True)
-        args_matched = args.parse_into_scope(None, fresh_frame.fastlocals_w,
+        args_matched = args.parse_into_scope(None, fresh_frame.locals_stack_w,
                                              func.name,
                                              sig, func.defs_w)
         fresh_frame.init_cells()
@@ -215,7 +216,7 @@
         # speed hack
         fresh_frame = jit.hint(frame, access_directly=True,
                                       fresh_virtualizable=True)
-        args_matched = args.parse_into_scope(w_obj, fresh_frame.fastlocals_w,
+        args_matched = args.parse_into_scope(w_obj, fresh_frame.locals_stack_w,
                                              func.name,
                                              sig, func.defs_w)
         fresh_frame.init_cells()
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -9,7 +9,7 @@
 from pypy.interpreter import pytraceback
 from pypy.rlib.objectmodel import we_are_translated, instantiate
 from pypy.rlib.jit import hint
-from pypy.rlib.debug import make_sure_not_resized
+from pypy.rlib.debug import make_sure_not_resized, check_nonneg
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib import jit
 from pypy.tool import stdlib_opcode
@@ -56,16 +56,18 @@
         assert isinstance(code, pycode.PyCode)
         self.pycode = code
         eval.Frame.__init__(self, space, w_globals)
-        self.valuestack_w = [None] * code.co_stacksize
-        self.valuestackdepth = 0
+        self.locals_stack_w = [None] * (code.co_nlocals + code.co_stacksize)
+        self.nlocals = code.co_nlocals
+        self.valuestackdepth = code.co_nlocals
         self.lastblock = None
+        make_sure_not_resized(self.locals_stack_w)
+        check_nonneg(self.nlocals)
+        #
         if space.config.objspace.honor__builtins__:
             self.builtin = space.builtin.pick_builtin(w_globals)
         # regular functions always have CO_OPTIMIZED and CO_NEWLOCALS.
         # class bodies only have CO_NEWLOCALS.
         self.initialize_frame_scopes(closure, code)
-        self.fastlocals_w = [None] * code.co_nlocals
-        make_sure_not_resized(self.fastlocals_w)
         self.f_lineno = code.co_firstlineno
 
     def mark_as_escaped(self):
@@ -184,14 +186,14 @@
     # stack manipulation helpers
     def pushvalue(self, w_object):
         depth = self.valuestackdepth
-        self.valuestack_w[depth] = w_object
+        self.locals_stack_w[depth] = w_object
         self.valuestackdepth = depth + 1
 
     def popvalue(self):
         depth = self.valuestackdepth - 1
-        assert depth >= 0, "pop from empty value stack"
-        w_object = self.valuestack_w[depth]
-        self.valuestack_w[depth] = None
+        assert depth >= self.nlocals, "pop from empty value stack"
+        w_object = self.locals_stack_w[depth]
+        self.locals_stack_w[depth] = None
         self.valuestackdepth = depth
         return w_object
 
@@ -217,24 +219,24 @@
     def peekvalues(self, n):
         values_w = [None] * n
         base = self.valuestackdepth - n
-        assert base >= 0
+        assert base >= self.nlocals
         while True:
             n -= 1
             if n < 0:
                 break
-            values_w[n] = self.valuestack_w[base+n]
+            values_w[n] = self.locals_stack_w[base+n]
         return values_w
 
     @jit.unroll_safe
     def dropvalues(self, n):
         n = hint(n, promote=True)
         finaldepth = self.valuestackdepth - n
-        assert finaldepth >= 0, "stack underflow in dropvalues()"        
+        assert finaldepth >= self.nlocals, "stack underflow in dropvalues()"
         while True:
             n -= 1
             if n < 0:
                 break
-            self.valuestack_w[finaldepth+n] = None
+            self.locals_stack_w[finaldepth+n] = None
         self.valuestackdepth = finaldepth
 
     @jit.unroll_safe
@@ -261,30 +263,30 @@
         # Contrast this with CPython where it's PEEK(-1).
         index_from_top = hint(index_from_top, promote=True)
         index = self.valuestackdepth + ~index_from_top
-        assert index >= 0, "peek past the bottom of the stack"
-        return self.valuestack_w[index]
+        assert index >= self.nlocals, "peek past the bottom of the stack"
+        return self.locals_stack_w[index]
 
     def settopvalue(self, w_object, index_from_top=0):
         index_from_top = hint(index_from_top, promote=True)
         index = self.valuestackdepth + ~index_from_top
-        assert index >= 0, "settop past the bottom of the stack"
-        self.valuestack_w[index] = w_object
+        assert index >= self.nlocals, "settop past the bottom of the stack"
+        self.locals_stack_w[index] = w_object
 
     @jit.unroll_safe
     def dropvaluesuntil(self, finaldepth):
         depth = self.valuestackdepth - 1
         finaldepth = hint(finaldepth, promote=True)
         while depth >= finaldepth:
-            self.valuestack_w[depth] = None
+            self.locals_stack_w[depth] = None
             depth -= 1
         self.valuestackdepth = finaldepth
 
-    def savevaluestack(self):
-        return self.valuestack_w[:self.valuestackdepth]
+    def save_locals_stack(self):
+        return self.locals_stack_w[:self.valuestackdepth]
 
-    def restorevaluestack(self, items_w):
-        assert None not in items_w
-        self.valuestack_w[:len(items_w)] = items_w
+    def restore_locals_stack(self, items_w):
+        self.locals_stack_w[:len(items_w)] = items_w
+        self.init_cells()
         self.dropvaluesuntil(len(items_w))
 
     def make_arguments(self, nargs):
@@ -314,11 +316,12 @@
         else:
             f_lineno = self.f_lineno
 
-        values_w = self.valuestack_w[0:self.valuestackdepth]
+        values_w = self.locals_stack_w[self.nlocals:self.valuestackdepth]
         w_valuestack = maker.slp_into_tuple_with_nulls(space, values_w)
         
         w_blockstack = nt([block._get_state_(space) for block in self.get_blocklist()])
-        w_fastlocals = maker.slp_into_tuple_with_nulls(space, self.fastlocals_w)
+        w_fastlocals = maker.slp_into_tuple_with_nulls(
+            space, self.locals_stack_w[:self.nlocals])
         if self.last_exception is None:
             w_exc_value = space.w_None
             w_tb = space.w_None
@@ -399,7 +402,8 @@
         new_frame.last_instr = space.int_w(w_last_instr)
         new_frame.frame_finished_execution = space.is_true(w_finished)
         new_frame.f_lineno = space.int_w(w_f_lineno)
-        new_frame.fastlocals_w = maker.slp_from_tuple_with_nulls(space, w_fastlocals)
+        fastlocals_w = maker.slp_from_tuple_with_nulls(space, w_fastlocals)
+        new_frame.locals_stack_w[:len(fastlocals_w)] = fastlocals_w
 
         if space.is_w(w_f_trace, space.w_None):
             new_frame.w_f_trace = None
@@ -423,28 +427,28 @@
     @jit.dont_look_inside
     def getfastscope(self):
         "Get the fast locals as a list."
-        return self.fastlocals_w
+        return self.locals_stack_w
 
     @jit.dont_look_inside
     def setfastscope(self, scope_w):
         """Initialize the fast locals from a list of values,
         where the order is according to self.pycode.signature()."""
         scope_len = len(scope_w)
-        if scope_len > len(self.fastlocals_w):
+        if scope_len > self.nlocals:
             raise ValueError, "new fastscope is longer than the allocated area"
-        # don't assign directly to 'fastlocals_w[:scope_len]' to be
+        # don't assign directly to 'locals_stack_w[:scope_len]' to be
         # virtualizable-friendly
         for i in range(scope_len):
-            self.fastlocals_w[i] = scope_w[i]
+            self.locals_stack_w[i] = scope_w[i]
         self.init_cells()
 
     def init_cells(self):
-        """Initialize cellvars from self.fastlocals_w
+        """Initialize cellvars from self.locals_stack_w.
         This is overridden in nestedscope.py"""
         pass
 
     def getfastscopelength(self):
-        return self.pycode.co_nlocals
+        return self.nlocals
 
     def getclosure(self):
         return None
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -324,7 +324,7 @@
 
     def LOAD_FAST(self, varindex, next_instr):
         # access a local variable directly
-        w_value = self.fastlocals_w[varindex]
+        w_value = self.locals_stack_w[varindex]
         if w_value is None:
             self._load_fast_failed(varindex)
         self.pushvalue(w_value)
@@ -343,7 +343,7 @@
     def STORE_FAST(self, varindex, next_instr):
         w_newvalue = self.popvalue()
         assert w_newvalue is not None
-        self.fastlocals_w[varindex] = w_newvalue
+        self.locals_stack_w[varindex] = w_newvalue
 
     def POP_TOP(self, oparg, next_instr):
         self.popvalue()
@@ -696,12 +696,12 @@
     LOAD_GLOBAL._always_inline_ = True
 
     def DELETE_FAST(self, varindex, next_instr):
-        if self.fastlocals_w[varindex] is None:
+        if self.locals_stack_w[varindex] is None:
             varname = self.getlocalvarname(varindex)
             message = "local variable '%s' referenced before assignment"
             raise operationerrfmt(self.space.w_UnboundLocalError, message,
                                   varname)
-        self.fastlocals_w[varindex] = None
+        self.locals_stack_w[varindex] = None
 
     def BUILD_TUPLE(self, itemcount, next_instr):
         items = self.popvalues(itemcount)
@@ -1048,13 +1048,13 @@
 
     def SET_ADD(self, oparg, next_instr):
         w_value = self.popvalue()
-        w_set = self.peekvalue(oparg)
+        w_set = self.peekvalue(oparg - 1)
         self.space.call_method(w_set, 'add', w_value)
 
     def MAP_ADD(self, oparg, next_instr):
         w_key = self.popvalue()
         w_value = self.popvalue()
-        w_dict = self.peekvalue(oparg)
+        w_dict = self.peekvalue(oparg - 1)
         self.space.setitem(w_dict, w_key, w_value)
 
     def SET_LINENO(self, lineno, next_instr):
@@ -1091,12 +1091,10 @@
 
     @jit.unroll_safe
     def BUILD_SET(self, itemcount, next_instr):
-        w_set = self.space.call_function(self.space.w_set)
-        if itemcount:
-            w_add = self.space.getattr(w_set, self.space.wrap("add"))
-            for i in range(itemcount):
-                w_item = self.popvalue()
-                self.space.call_function(w_add, w_item)
+        w_set = self.space.newset()
+        for i in range(itemcount):
+            w_item = self.popvalue()
+            self.space.call_method(w_set, 'add', w_item)
         self.pushvalue(w_set)
 
     def STORE_MAP(self, oparg, next_instr):
diff --git a/pypy/interpreter/test/test_eval.py b/pypy/interpreter/test/test_eval.py
--- a/pypy/interpreter/test/test_eval.py
+++ b/pypy/interpreter/test/test_eval.py
@@ -15,16 +15,16 @@
                 self.code = code
                 Frame.__init__(self, space)
                 self.numlocals = numlocals
-                self.fastlocals_w = [None] * self.numlocals
+                self._fastlocals_w = [None] * self.numlocals
 
             def getcode(self):
                 return self.code
 
             def setfastscope(self, scope_w):
-                self.fastlocals_w = scope_w
+                self._fastlocals_w = scope_w
 
             def getfastscope(self):
-                return self.fastlocals_w
+                return self._fastlocals_w
 
             def getfastscopelength(self):
                 return self.numlocals
@@ -38,11 +38,11 @@
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({}))
         
-        self.f.fastlocals_w[0] = w(5)
+        self.f._fastlocals_w[0] = w(5)
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({'x': 5}))
 
-        self.f.fastlocals_w[2] = w(7)
+        self.f._fastlocals_w[2] = w(7)
         self.f.fast2locals()
         assert space.eq_w(self.f.w_locals, self.space.wrap({'x': 5, 'args': 7}))
 
@@ -57,13 +57,13 @@
         w = self.space.wrap
         self.f.w_locals = self.space.wrap({})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [None]*5)
+        self.sameList(self.f._fastlocals_w, [None]*5)
 
         self.f.w_locals = self.space.wrap({'x': 5})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [w(5)] + [None]*4)
+        self.sameList(self.f._fastlocals_w, [w(5)] + [None]*4)
 
         self.f.w_locals = self.space.wrap({'x':5, 'args':7})
         self.f.locals2fast()
-        self.sameList(self.f.fastlocals_w, [w(5), None, w(7),
-                                            None, None])
+        self.sameList(self.f._fastlocals_w, [w(5), None, w(7),
+                                             None, None])
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -203,3 +203,26 @@
         lst = seen[:]
         assert lst == [5, 10, 2]
         raises(OSError, os.lseek, fd, 7, 0)
+
+    def test_method_attrs(self):
+        import sys
+        class A(object):
+            def m(self):
+                "aaa"
+            m.x = 3
+
+        bm = A().m
+        assert bm.__func__ is bm.im_func
+        assert bm.__self__ is bm.im_self
+        assert bm.im_class is A
+        if '__pypy__' in sys.builtin_module_names:
+            assert bm.__objclass__ is A
+        assert bm.__doc__ == "aaa"
+        assert bm.x == 3
+        raises(AttributeError, setattr, bm, 'x', 15)
+        l = []
+        assert l.append.__self__ is l
+        if '__pypy__' in sys.builtin_module_names:
+            assert l.append.__objclass__ is list
+        assert l.__add__.__self__ is l
+        assert l.__add__.__objclass__ is list
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -761,13 +761,17 @@
     )
 Function.typedef.acceptable_as_base_class = False
 
-Method.typedef = TypeDef("method",
+Method.typedef = TypeDef(
+    "method",
     __new__ = interp2app(Method.descr_method__new__.im_func),
     __call__ = interp2app(Method.descr_method_call),
     __get__ = interp2app(Method.descr_method_get),
     im_func  = interp_attrproperty_w('w_function', cls=Method),
+    __func__ = interp_attrproperty_w('w_function', cls=Method),
     im_self  = interp_attrproperty_w('w_instance', cls=Method),
+    __self__ = interp_attrproperty_w('w_instance', cls=Method),
     im_class = interp_attrproperty_w('w_class', cls=Method),
+    __objclass__ = interp_attrproperty_w('w_class', cls=Method),
     __getattribute__ = interp2app(Method.descr_method_getattribute),
     __eq__ = interp2app(Method.descr_method_eq),
     __ne__ = descr_generic_ne,
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -703,22 +703,28 @@
         # we need to put two words into the shadowstack: the MARKER
         # and the address of the frame (ebp, actually)
         rst = gcrootmap.get_root_stack_top_addr()
-        assert rx86.fits_in_32bits(rst)
-        if IS_X86_64:
-            # cannot use rdx here, it's used to pass arguments!
-            tmp = X86_64_SCRATCH_REG
+        if rx86.fits_in_32bits(rst):
+            self.mc.MOV_rj(eax.value, rst)            # MOV eax, [rootstacktop]
         else:
-            tmp = edx
-        self.mc.MOV_rj(eax.value, rst)                # MOV eax, [rootstacktop]
-        self.mc.LEA_rm(tmp.value, (eax.value, 2*WORD))  # LEA edx, [eax+2*WORD]
+            self.mc.MOV_ri(r13.value, rst)            # MOV r13, rootstacktop
+            self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
+        #
+        self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD))  # LEA ebx, [eax+2*WORD]
         self.mc.MOV_mi((eax.value, 0), gcrootmap.MARKER)    # MOV [eax], MARKER
         self.mc.MOV_mr((eax.value, WORD), ebp.value)      # MOV [eax+WORD], ebp
-        self.mc.MOV_jr(rst, tmp.value)                # MOV [rootstacktop], edx
+        #
+        if rx86.fits_in_32bits(rst):
+            self.mc.MOV_jr(rst, ebx.value)            # MOV [rootstacktop], ebx
+        else:
+            self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
 
     def _call_footer_shadowstack(self, gcrootmap):
         rst = gcrootmap.get_root_stack_top_addr()
-        assert rx86.fits_in_32bits(rst)
-        self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+        if rx86.fits_in_32bits(rst):
+            self.mc.SUB_ji8(rst, 2*WORD)       # SUB [rootstacktop], 2*WORD
+        else:
+            self.mc.MOV_ri(ebx.value, rst)           # MOV ebx, rootstacktop
+            self.mc.SUB_mi8((ebx.value, 0), 2*WORD)  # SUB [ebx], 2*WORD
 
     def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
         if IS_X86_64:
@@ -889,7 +895,7 @@
 
     def regalloc_push(self, loc):
         if isinstance(loc, RegLoc) and loc.is_xmm:
-            self.mc.SUB_ri(esp.value, 2*WORD)
+            self.mc.SUB_ri(esp.value, 8)   # = size of doubles
             self.mc.MOVSD_sx(0, loc.value)
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
             # XXX evil trick
@@ -901,7 +907,7 @@
     def regalloc_pop(self, loc):
         if isinstance(loc, RegLoc) and loc.is_xmm:
             self.mc.MOVSD_xs(loc.value, 0)
-            self.mc.ADD_ri(esp.value, 2*WORD)
+            self.mc.ADD_ri(esp.value, 8)   # = size of doubles
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
             # XXX evil trick
             self.mc.POP_b(get_ebp_ofs(loc.position + 1))
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -318,7 +318,9 @@
             # must be careful not to combine it with location types that
             # might need to use the scratch register themselves.
             if loc2 is X86_64_SCRATCH_REG:
-                assert code1 != 'j'
+                if code1 == 'j':
+                    assert (name.startswith("MOV") and
+                            rx86.fits_in_32bits(loc1.value_j()))
             if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"):
                 assert code2 not in ('j', 'i')
 
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -283,7 +283,7 @@
 # with immediate(argnum)).
 
 def encode_abs(mc, _1, _2, orbyte):
-    # expands to either '\x05' on 32-bit, or '\x04\x25' or 64-bit
+    # expands to either '\x05' on 32-bit, or '\x04\x25' on 64-bit
     if mc.WORD == 8:
         mc.writechar(chr(0x04 | orbyte))
         mc.writechar(chr(0x25))
@@ -370,6 +370,8 @@
     INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_, immediate(2))
     INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_, immediate(1),
                     immediate(2,'b'))
+    INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
+                    immediate(2,'b'))
     INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1), immediate(2,'b'))
     INSN_bi32= insn(rex_w, '\x81', orbyte(base), stack_bp(1), immediate(2))
 
@@ -388,7 +390,7 @@
     INSN_bi._always_inline_ = True      # try to constant-fold single_byte()
 
     return (INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj,
-            INSN_ji8)
+            INSN_ji8, INSN_mi8)
 
 def select_8_or_32_bit_immed(insn_8, insn_32):
     def INSN(*args):
@@ -467,13 +469,13 @@
 
     # ------------------------------ Arithmetic ------------------------------
 
-    ADD_ri, ADD_rr, ADD_rb, _, _, ADD_rm, ADD_rj, _ = common_modes(0)
-    OR_ri,  OR_rr,  OR_rb,  _, _, OR_rm,  OR_rj,  _ = common_modes(1)
-    AND_ri, AND_rr, AND_rb, _, _, AND_rm, AND_rj, _ = common_modes(4)
-    SUB_ri, SUB_rr, SUB_rb, _, _, SUB_rm, SUB_rj, SUB_ji8 = common_modes(5)
-    SBB_ri, SBB_rr, SBB_rb, _, _, SBB_rm, SBB_rj, _ = common_modes(3)
-    XOR_ri, XOR_rr, XOR_rb, _, _, XOR_rm, XOR_rj, _ = common_modes(6)
-    CMP_ri, CMP_rr, CMP_rb, CMP_bi, CMP_br, CMP_rm, CMP_rj, _ = common_modes(7)
+    ADD_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_ = common_modes(0)
+    OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_ = common_modes(1)
+    AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_ = common_modes(4)
+    SUB_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8 = common_modes(5)
+    SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_ = common_modes(3)
+    XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_ = common_modes(6)
+    CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_ = common_modes(7)
 
     CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
     CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
diff --git a/pypy/jit/backend/x86/test/test_assembler.py b/pypy/jit/backend/x86/test/test_assembler.py
--- a/pypy/jit/backend/x86/test/test_assembler.py
+++ b/pypy/jit/backend/x86/test/test_assembler.py
@@ -1,13 +1,15 @@
 from pypy.jit.backend.x86.regloc import *
 from pypy.jit.backend.x86.assembler import Assembler386
 from pypy.jit.backend.x86.regalloc import X86FrameManager, get_ebp_ofs
-from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, INT, REF, FLOAT
+from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, ConstFloat
+from pypy.jit.metainterp.history import INT, REF, FLOAT
 from pypy.rlib.rarithmetic import intmask
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
 from pypy.jit.backend.detect_cpu import getcpuclass 
 from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86_64_RegisterManager, X86XMMRegisterManager, X86_64_XMMRegisterManager
 from pypy.jit.codewriter import longlong
+import ctypes
 
 ACTUAL_CPU = getcpuclass()
 
@@ -238,3 +240,103 @@
         assert assembler.fail_boxes_int.getitem(i) == expected_ints[i]
         assert assembler.fail_boxes_ptr.getitem(i) == expected_ptrs[i]
         assert assembler.fail_boxes_float.getitem(i) == expected_floats[i]
+
+# ____________________________________________________________
+
+class TestRegallocPushPop(object):
+
+    def do_test(self, callback):
+        from pypy.jit.backend.x86.regalloc import X86FrameManager
+        from pypy.jit.backend.x86.regalloc import X86XMMRegisterManager
+        class FakeToken:
+            class compiled_loop_token:
+                asmmemmgr_blocks = None
+        cpu = ACTUAL_CPU(None, None)
+        cpu.setup()
+        looptoken = FakeToken()
+        asm = cpu.assembler
+        asm.setup_once()
+        asm.setup(looptoken)
+        self.fm = X86FrameManager()
+        self.xrm = X86XMMRegisterManager(None, frame_manager=self.fm,
+                                         assembler=asm)
+        callback(asm)
+        asm.mc.RET()
+        rawstart = asm.materialize_loop(looptoken)
+        #
+        F = ctypes.CFUNCTYPE(ctypes.c_long)
+        fn = ctypes.cast(rawstart, F)
+        res = fn()
+        return res
+
+    def test_simple(self):
+        def callback(asm):
+            asm.mov(imm(42), edx)
+            asm.regalloc_push(edx)
+            asm.regalloc_pop(eax)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_push_stack(self):
+        def callback(asm):
+            loc = self.fm.frame_pos(5, INT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(imm(42), loc)
+            asm.regalloc_push(loc)
+            asm.regalloc_pop(eax)
+            asm.mc.ADD_ri(esp.value, 64)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_pop_stack(self):
+        def callback(asm):
+            loc = self.fm.frame_pos(5, INT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(imm(42), edx)
+            asm.regalloc_push(edx)
+            asm.regalloc_pop(loc)
+            asm.mov(loc, eax)
+            asm.mc.ADD_ri(esp.value, 64)
+        res = self.do_test(callback)
+        assert res == 42
+
+    def test_simple_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            asm.mov(loc, xmm5)
+            asm.regalloc_push(xmm5)
+            asm.regalloc_pop(xmm0)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
+
+    def test_push_stack_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            loc2 = self.fm.frame_pos(4, FLOAT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(loc, xmm5)
+            asm.mov(xmm5, loc2)
+            asm.regalloc_push(loc2)
+            asm.regalloc_pop(xmm0)
+            asm.mc.ADD_ri(esp.value, 64)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
+
+    def test_pop_stack_xmm(self):
+        def callback(asm):
+            c = ConstFloat(longlong.getfloatstorage(-42.5))
+            loc = self.xrm.convert_to_imm(c)
+            loc2 = self.fm.frame_pos(4, FLOAT)
+            asm.mc.SUB_ri(esp.value, 64)
+            asm.mov(loc, xmm5)
+            asm.regalloc_push(xmm5)
+            asm.regalloc_pop(loc2)
+            asm.mov(loc2, xmm0)
+            asm.mc.ADD_ri(esp.value, 64)
+            asm.mc.CVTTSD2SI(eax, xmm0)
+        res = self.do_test(callback)
+        assert res == -42
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -6,6 +6,7 @@
                                          ConstPtr, Box, BoxFloat, BasicFailDescr)
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.arch import WORD
+from pypy.jit.backend.x86.rx86 import fits_in_32bits
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.executor import execute
@@ -241,6 +242,23 @@
         c = self.execute_operation(rop.GETFIELD_GC, [res], 'int', ofsc3)
         assert c.value == 3
 
+    def test_bug_setfield_64bit(self):
+        if WORD == 4:
+            py.test.skip("only for 64 bits")
+        TP = lltype.GcStruct('S', ('i', lltype.Signed))
+        ofsi = self.cpu.fielddescrof(TP, 'i')
+        for i in range(500):
+            p = lltype.malloc(TP)
+            addr = rffi.cast(lltype.Signed, p)
+            if fits_in_32bits(addr):
+                break    # fitting in 32 bits, good
+        else:
+            py.test.skip("cannot get a 32-bit pointer")
+        res = ConstPtr(rffi.cast(llmemory.GCREF, addr))
+        self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)],
+                               'void', ofsi)
+        assert p.i == 3**33
+
     def test_nullity_with_guard(self):
         allops = [rop.INT_IS_TRUE]
         guards = [rop.GUARD_TRUE, rop.GUARD_FALSE]
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -185,6 +185,13 @@
     cb = CodeBuilder32
     assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00')
 
+def test_sub_ji8():
+    cb = CodeBuilder32
+    assert_encodes_as(cb, 'SUB_ji8', (11223344, 55),
+                      '\x83\x2D\x30\x41\xAB\x00\x37')
+    assert_encodes_as(cb, 'SUB_mi8', ((edx, 16), 55),
+                      '\x83\x6A\x10\x37')
+
 class CodeBuilder64(CodeBuilderMixin, X86_64_CodeBuilder):
     pass
 
diff --git a/pypy/jit/codewriter/effectinfo.py b/pypy/jit/codewriter/effectinfo.py
--- a/pypy/jit/codewriter/effectinfo.py
+++ b/pypy/jit/codewriter/effectinfo.py
@@ -75,12 +75,13 @@
     #
     OS_MATH_SQRT                = 100
 
-    def __new__(cls, readonly_descrs_fields,
+    def __new__(cls, readonly_descrs_fields, readonly_descrs_arrays,
                 write_descrs_fields, write_descrs_arrays,
                 extraeffect=EF_CAN_RAISE,
                 oopspecindex=OS_NONE,
                 can_invalidate=False):
         key = (frozenset(readonly_descrs_fields),
+               frozenset(readonly_descrs_arrays),
                frozenset(write_descrs_fields),
                frozenset(write_descrs_arrays),
                extraeffect,
@@ -89,6 +90,7 @@
             return cls._cache[key]
         result = object.__new__(cls)
         result.readonly_descrs_fields = readonly_descrs_fields
+        result.readonly_descrs_arrays = readonly_descrs_arrays
         if extraeffect == EffectInfo.EF_LOOPINVARIANT or \
            extraeffect == EffectInfo.EF_PURE:            
             result.write_descrs_fields = []
@@ -119,7 +121,7 @@
     if effects is top_set:
         return None
     readonly_descrs_fields = []
-    # readonly_descrs_arrays = [] --- not enabled for now
+    readonly_descrs_arrays = []
     write_descrs_fields = []
     write_descrs_arrays = []
 
@@ -145,10 +147,13 @@
         elif tup[0] == "array":
             add_array(write_descrs_arrays, tup)
         elif tup[0] == "readarray":
-            pass
+            tupw = ("array",) + tup[1:]
+            if tupw not in effects:
+                add_array(readonly_descrs_arrays, tup)
         else:
             assert 0
     return EffectInfo(readonly_descrs_fields,
+                      readonly_descrs_arrays,
                       write_descrs_fields,
                       write_descrs_arrays,
                       extraeffect,
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -44,10 +44,6 @@
             return True
         if mod.startswith('pypy.translator.'): # XXX wtf?
             return True
-        # string builder interface
-        if mod == 'pypy.rpython.lltypesystem.rbuilder':
-            return True
-        
         return False
 
     def look_inside_graph(self, graph):
diff --git a/pypy/jit/codewriter/test/test_effectinfo.py b/pypy/jit/codewriter/test/test_effectinfo.py
--- a/pypy/jit/codewriter/test/test_effectinfo.py
+++ b/pypy/jit/codewriter/test/test_effectinfo.py
@@ -34,6 +34,15 @@
     assert not effectinfo.readonly_descrs_fields
     assert not effectinfo.write_descrs_arrays
 
+def test_include_read_array():
+    A = lltype.GcArray(lltype.Signed)
+    effects = frozenset([("readarray", lltype.Ptr(A))])
+    effectinfo = effectinfo_from_writeanalyze(effects, FakeCPU())
+    assert not effectinfo.readonly_descrs_fields
+    assert list(effectinfo.readonly_descrs_arrays) == [('arraydescr', A)]
+    assert not effectinfo.write_descrs_fields
+    assert not effectinfo.write_descrs_arrays
+
 def test_include_write_array():
     A = lltype.GcArray(lltype.Signed)
     effects = frozenset([("array", lltype.Ptr(A))])
@@ -51,6 +60,16 @@
     assert list(effectinfo.write_descrs_fields) == [('fielddescr', S, "a")]
     assert not effectinfo.write_descrs_arrays
 
+def test_dont_include_read_and_write_array():
+    A = lltype.GcArray(lltype.Signed)
+    effects = frozenset([("readarray", lltype.Ptr(A)),
+                         ("array", lltype.Ptr(A))])
+    effectinfo = effectinfo_from_writeanalyze(effects, FakeCPU())
+    assert not effectinfo.readonly_descrs_fields
+    assert not effectinfo.readonly_descrs_arrays
+    assert not effectinfo.write_descrs_fields
+    assert list(effectinfo.write_descrs_arrays) == [('arraydescr', A)]
+
 
 def test_filter_out_typeptr():
     effects = frozenset([("struct", lltype.Ptr(OBJECT), "typeptr")])
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -3,7 +3,7 @@
 from pypy.rpython.ootypesystem import ootype
 from pypy.objspace.flow.model import Constant, Variable
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.debug import debug_start, debug_stop
+from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.rlib import rstack
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
@@ -15,7 +15,7 @@
 from pypy.jit.metainterp import history
 from pypy.jit.metainterp.typesystem import llhelper, oohelper
 from pypy.jit.metainterp.optimize import InvalidLoop
-from pypy.jit.metainterp.resume import NUMBERING
+from pypy.jit.metainterp.resume import NUMBERING, PENDINGFIELDSP
 from pypy.jit.codewriter import heaptracker, longlong
 
 def giveup():
@@ -119,6 +119,7 @@
         old_loop_token = optimize_loop(metainterp_sd, old_loop_tokens, loop,
                                        jitdriver_sd.warmstate.enable_opts)
     except InvalidLoop:
+        debug_print("compile_new_loop: got an InvalidLoop")
         return None
     if old_loop_token is not None:
         metainterp.staticdata.log("reusing old loop")
@@ -302,7 +303,7 @@
     rd_numb = lltype.nullptr(NUMBERING)
     rd_consts = None
     rd_virtuals = None
-    rd_pendingfields = None
+    rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
 
     CNT_INT   = -0x20000000
     CNT_REF   = -0x40000000
@@ -633,6 +634,7 @@
                                             new_loop, state.enable_opts,
                                             inline_short_preamble, retraced)
     except InvalidLoop:
+        debug_print("compile_new_bridge: got an InvalidLoop")
         # XXX I am fairly convinced that optimize_bridge cannot actually raise
         # InvalidLoop
         return None
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -4,7 +4,7 @@
 from pypy.rpython.ootypesystem import ootype
 from pypy.rlib.objectmodel import we_are_translated, r_dict, Symbolic
 from pypy.rlib.objectmodel import compute_unique_id
-from pypy.rlib.rarithmetic import intmask, r_int64
+from pypy.rlib.rarithmetic import r_int64
 from pypy.conftest import option
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop
@@ -791,6 +791,7 @@
 
     def dump(self):
         self.compiled_loop_token.cpu.dump_loop_token(self)
+
 class TreeLoop(object):
     inputargs = None
     operations = None
diff --git a/pypy/jit/metainterp/optimize.py b/pypy/jit/metainterp/optimize.py
--- a/pypy/jit/metainterp/optimize.py
+++ b/pypy/jit/metainterp/optimize.py
@@ -25,7 +25,6 @@
 
 def _optimize_loop(metainterp_sd, old_loop_tokens, loop, enable_opts):
     from pypy.jit.metainterp.optimizeopt import optimize_loop_1
-    cpu = metainterp_sd.cpu
     loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
                                                       loop.operations)
     # XXX do we really still need a list?
@@ -49,7 +48,6 @@
 def _optimize_bridge(metainterp_sd, old_loop_tokens, bridge, enable_opts,
                      inline_short_preamble, retraced=False):
     from pypy.jit.metainterp.optimizeopt import optimize_bridge_1
-    cpu = metainterp_sd.cpu
     bridge.logops = metainterp_sd.logger_noopt.log_loop(bridge.inputargs,
                                                         bridge.operations)
     if old_loop_tokens:
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -8,8 +8,8 @@
 
 class CachedField(object):
     def __init__(self):
-        # Cache information for a field descr.  It can be in one
-        # of two states:
+        # Cache information for a field descr, or for an (array descr, index)
+        # pair.  It can be in one of two states:
         #
         #   1. 'cached_fields' is a dict mapping OptValues of structs
         #      to OptValues of fields.  All fields on-heap are
@@ -27,19 +27,19 @@
         self._lazy_setfield_registered = False
 
     def do_setfield(self, optheap, op):
-        # Update the state with the SETFIELD_GC operation 'op'.
+        # Update the state with the SETFIELD_GC/SETARRAYITEM_GC operation 'op'.
         structvalue = optheap.getvalue(op.getarg(0))
-        fieldvalue  = optheap.getvalue(op.getarg(1))
+        fieldvalue  = optheap.getvalue(op.getarglist()[-1])
         if self.possible_aliasing(optheap, structvalue):
             self.force_lazy_setfield(optheap)
             assert not self.possible_aliasing(optheap, structvalue)
         cached_fieldvalue = self._cached_fields.get(structvalue, None)
         if cached_fieldvalue is not fieldvalue:
             # common case: store the 'op' as lazy_setfield, and register
-            # myself in the optheap's _lazy_setfields list
+            # myself in the optheap's _lazy_setfields_and_arrayitems list
             self._lazy_setfield = op
             if not self._lazy_setfield_registered:
-                optheap._lazy_setfields.append(self)
+                optheap._lazy_setfields_and_arrayitems.append(self)
                 self._lazy_setfield_registered = True
         else:
             # this is the case where the pending setfield ends up
@@ -65,7 +65,7 @@
         if self._lazy_setfield is not None:
             op = self._lazy_setfield
             assert optheap.getvalue(op.getarg(0)) is structvalue
-            return optheap.getvalue(op.getarg(1))
+            return optheap.getvalue(op.getarglist()[-1])
         else:
             return self._cached_fields.get(structvalue, None)
 
@@ -87,7 +87,7 @@
             # back in the cache: the value of this particular structure's
             # field.
             structvalue = optheap.getvalue(op.getarg(0))
-            fieldvalue  = optheap.getvalue(op.getarg(1))
+            fieldvalue  = optheap.getvalue(op.getarglist()[-1])
             self.remember_field_value(structvalue, fieldvalue)
 
     def get_reconstructed(self, optimizer, valuemap):
@@ -100,12 +100,6 @@
         return cf
 
 
-class CachedArrayItems(object):
-    def __init__(self):
-        self.fixed_index_items = {}
-        self.var_index_item = None
-        self.var_index_indexvalue = None
-
 class BogusPureField(JitException):
     pass
 
@@ -116,9 +110,10 @@
     def __init__(self):
         # cached fields:  {descr: CachedField}
         self.cached_fields = {}
-        self._lazy_setfields = []
-        # cached array items:  {descr: CachedArrayItems}
+        # cached array items:  {array descr: {index: CachedField}}
         self.cached_arrayitems = {}
+        #
+        self._lazy_setfields_and_arrayitems = []
         self._remove_guard_not_invalidated = False
         self._seen_guard_not_invalidated = False
 
@@ -126,34 +121,23 @@
         new = OptHeap()
 
         if True:
-            self.force_all_lazy_setfields()
+            self.force_all_lazy_setfields_and_arrayitems()
         else:
             assert 0   # was: new.lazy_setfields = self.lazy_setfields
 
         for descr, d in self.cached_fields.items():
             new.cached_fields[descr] = d.get_reconstructed(optimizer, valuemap)
 
-        new.cached_arrayitems = {}
-        for descr, d in self.cached_arrayitems.items():
-            newd = {}
-            new.cached_arrayitems[descr] = newd
-            for value, cache in d.items():
-                newcache = CachedArrayItems()
-                newd[value.get_reconstructed(optimizer, valuemap)] = newcache
-                if cache.var_index_item:
-                    newcache.var_index_item = \
-                          cache.var_index_item.get_reconstructed(optimizer, valuemap)
-                if cache.var_index_indexvalue:
-                    newcache.var_index_indexvalue = \
-                          cache.var_index_indexvalue.get_reconstructed(optimizer, valuemap)
-                for index, fieldvalue in cache.fixed_index_items.items():
-                    newcache.fixed_index_items[index] = \
-                           fieldvalue.get_reconstructed(optimizer, valuemap)
+        for descr, submap in self.cached_arrayitems.items():
+            newdict = {}
+            for index, d in submap.items():
+                newdict[index] = d.get_reconstructed(optimizer, valuemap)
+            new.cached_arrayitems[descr] = newdict
 
         return new
 
     def clean_caches(self):
-        del self._lazy_setfields[:]
+        del self._lazy_setfields_and_arrayitems[:]
         self.cached_fields.clear()
         self.cached_arrayitems.clear()
 
@@ -164,50 +148,16 @@
             cf = self.cached_fields[descr] = CachedField()
         return cf
 
-    def cache_arrayitem_value(self, descr, value, indexvalue, fieldvalue, write=False):
-        d = self.cached_arrayitems.get(descr, None)
-        if d is None:
-            d = self.cached_arrayitems[descr] = {}
-        cache = d.get(value, None)
-        if cache is None:
-            cache = d[value] = CachedArrayItems()
-        indexbox = self.get_constant_box(indexvalue.box)
-        if indexbox is not None:
-            index = indexbox.getint()
-            if write:
-                for value, othercache in d.iteritems():
-                    # fixed index, clean the variable index cache, in case the
-                    # index is the same
-                    othercache.var_index_indexvalue = None
-                    othercache.var_index_item = None
-                    try:
-                        del othercache.fixed_index_items[index]
-                    except KeyError:
-                        pass
-            cache.fixed_index_items[index] = fieldvalue
-        else:
-            if write:
-                for value, othercache in d.iteritems():
-                    # variable index, clear all caches for this descr
-                    othercache.var_index_indexvalue = None
-                    othercache.var_index_item = None
-                    othercache.fixed_index_items.clear()
-            cache.var_index_indexvalue = indexvalue
-            cache.var_index_item = fieldvalue
-
-    def read_cached_arrayitem(self, descr, value, indexvalue):
-        d = self.cached_arrayitems.get(descr, None)
-        if d is None:
-            return None
-        cache = d.get(value, None)
-        if cache is None:
-            return None
-        indexbox = self.get_constant_box(indexvalue.box)
-        if indexbox is not None:
-            return cache.fixed_index_items.get(indexbox.getint(), None)
-        elif cache.var_index_indexvalue is indexvalue:
-            return cache.var_index_item
-        return None
+    def arrayitem_cache(self, descr, index):
+        try:
+            submap = self.cached_arrayitems[descr]
+        except KeyError:
+            submap = self.cached_arrayitems[descr] = {}
+        try:
+            cf = submap[index]
+        except KeyError:
+            cf = submap[index] = CachedField()
+        return cf
 
     def emit_operation(self, op):
         self.emitting_operation(op)
@@ -219,7 +169,8 @@
         if op.is_ovf():
             return
         if op.is_guard():
-            self.optimizer.pendingfields = self.force_lazy_setfields_for_guard()
+            self.optimizer.pendingfields = (
+                self.force_lazy_setfields_and_arrayitems_for_guard())
             return
         opnum = op.getopnum()
         if (opnum == rop.SETFIELD_GC or        # handled specially
@@ -248,6 +199,8 @@
                 # XXX stored on effectinfo are large
                 for fielddescr in effectinfo.readonly_descrs_fields:
                     self.force_lazy_setfield(fielddescr)
+                for arraydescr in effectinfo.readonly_descrs_arrays:
+                    self.force_lazy_setarrayitem(arraydescr)
                 for fielddescr in effectinfo.write_descrs_fields:
                     self.force_lazy_setfield(fielddescr)
                     try:
@@ -256,8 +209,11 @@
                     except KeyError:
                         pass
                 for arraydescr in effectinfo.write_descrs_arrays:
+                    self.force_lazy_setarrayitem(arraydescr)
                     try:
-                        del self.cached_arrayitems[arraydescr]
+                        submap = self.cached_arrayitems[arraydescr]
+                        for cf in submap.itervalues():
+                            cf._cached_fields.clear()
                     except KeyError:
                         pass
                 if effectinfo.check_forces_virtual_or_virtualizable():
@@ -266,7 +222,7 @@
                     # ^^^ we only need to force this field; the other fields
                     # of virtualref_info and virtualizable_info are not gcptrs.
                 return
-        self.force_all_lazy_setfields()
+        self.force_all_lazy_setfields_and_arrayitems()
         self.clean_caches()
 
 
@@ -277,6 +233,10 @@
             for cf in self.cached_fields.itervalues():
                 if value in cf._cached_fields:
                     cf._cached_fields[newvalue] = cf._cached_fields[value]
+            for submap in self.cached_arrayitems.itervalues():
+                for cf in submap.itervalues():
+                    if value in cf._cached_fields:
+                        cf._cached_fields[newvalue] = cf._cached_fields[value]
 
     def force_lazy_setfield(self, descr):
         try:
@@ -285,6 +245,14 @@
             return
         cf.force_lazy_setfield(self)
 
+    def force_lazy_setarrayitem(self, arraydescr):
+        try:
+            submap = self.cached_arrayitems[arraydescr]
+        except KeyError:
+            return
+        for cf in submap.values():
+            cf.force_lazy_setfield(self)
+
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
         # sense to avoid a situation like "int_eq/setfield_gc/guard_true",
@@ -309,30 +277,49 @@
         newoperations[-2] = lastop
         newoperations[-1] = prevop
 
-    def force_all_lazy_setfields(self):
-        for cf in self._lazy_setfields:
-            if not we_are_translated():
-                assert cf in self.cached_fields.values()
+    def _assert_valid_cf(self, cf):
+        # check that 'cf' is in cached_fields or cached_arrayitems
+        if not we_are_translated():
+            if cf not in self.cached_fields.values():
+                for submap in self.cached_arrayitems.values():
+                    if cf in submap.values():
+                        break
+                else:
+                    assert 0, "'cf' not in cached_fields/cached_arrayitems"
+
+    def force_all_lazy_setfields_and_arrayitems(self):
+        for cf in self._lazy_setfields_and_arrayitems:
+            self._assert_valid_cf(cf)
             cf.force_lazy_setfield(self)
 
-    def force_lazy_setfields_for_guard(self):
+    def force_lazy_setfields_and_arrayitems_for_guard(self):
         pendingfields = []
-        for cf in self._lazy_setfields:
-            if not we_are_translated():
-                assert cf in self.cached_fields.values()
+        for cf in self._lazy_setfields_and_arrayitems:
+            self._assert_valid_cf(cf)
             op = cf._lazy_setfield
             if op is None:
                 continue
             # the only really interesting case that we need to handle in the
             # guards' resume data is that of a virtual object that is stored
-            # into a field of a non-virtual object.
+            # into a field of a non-virtual object.  Here, 'op' in either
+            # SETFIELD_GC or SETARRAYITEM_GC.
             value = self.getvalue(op.getarg(0))
             assert not value.is_virtual()      # it must be a non-virtual
-            fieldvalue = self.getvalue(op.getarg(1))
+            fieldvalue = self.getvalue(op.getarglist()[-1])
             if fieldvalue.is_virtual():
                 # this is the case that we leave to resume.py
+                opnum = op.getopnum()
+                if opnum == rop.SETFIELD_GC:
+                    itemindex = -1
+                elif opnum == rop.SETARRAYITEM_GC:
+                    indexvalue = self.getvalue(op.getarg(1))
+                    assert indexvalue.is_constant()
+                    itemindex = indexvalue.box.getint()
+                    assert itemindex >= 0
+                else:
+                    assert 0
                 pendingfields.append((op.getdescr(), value.box,
-                                      fieldvalue.get_key_box()))
+                                      fieldvalue.get_key_box(), itemindex))
             else:
                 cf.force_lazy_setfield(self)
                 self.fixup_guard_situation()
@@ -364,24 +351,45 @@
         cf.do_setfield(self, op)
 
     def optimize_GETARRAYITEM_GC(self, op):
-        value = self.getvalue(op.getarg(0))
+        arrayvalue = self.getvalue(op.getarg(0))
         indexvalue = self.getvalue(op.getarg(1))
-        fieldvalue = self.read_cached_arrayitem(op.getdescr(), value, indexvalue)
-        if fieldvalue is not None:
-            self.make_equal_to(op.result, fieldvalue)
-            return
-        ###self.optimizer.optimize_default(op)
+        cf = None
+        if indexvalue.is_constant():
+            # use the cache on (arraydescr, index), which is a constant
+            cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
+            fieldvalue = cf.getfield_from_cache(self, arrayvalue)
+            if fieldvalue is not None:
+                self.make_equal_to(op.result, fieldvalue)
+                return
+        else:
+            # variable index, so make sure the lazy setarrayitems are done
+            self.force_lazy_setarrayitem(op.getdescr())
+        # default case: produce the operation
+        arrayvalue.ensure_nonnull()
         self.emit_operation(op)
-        fieldvalue = self.getvalue(op.result)
-        self.cache_arrayitem_value(op.getdescr(), value, indexvalue, fieldvalue)
+        # the remember the result of reading the array item
+        if cf is not None:
+            fieldvalue = self.getvalue(op.result)
+            cf.remember_field_value(arrayvalue, fieldvalue)
 
     def optimize_SETARRAYITEM_GC(self, op):
-        self.emit_operation(op)
-        value = self.getvalue(op.getarg(0))
-        fieldvalue = self.getvalue(op.getarg(2))
+        if self.has_pure_result(rop.GETARRAYITEM_GC_PURE, [op.getarg(0),
+                                                           op.getarg(1)],
+                                op.getdescr()):
+            os.write(2, '[bogus immutable array declaration: %s]\n' %
+                     (op.getdescr().repr_of_descr()))
+            raise BogusPureField
+        #
         indexvalue = self.getvalue(op.getarg(1))
-        self.cache_arrayitem_value(op.getdescr(), value, indexvalue, fieldvalue,
-                                   write=True)
+        if indexvalue.is_constant():
+            # use the cache on (arraydescr, index), which is a constant
+            cf = self.arrayitem_cache(op.getdescr(), indexvalue.box.getint())
+            cf.do_setfield(self, op)
+        else:
+            # variable index, so make sure the lazy setarrayitems are done
+            self.force_lazy_setarrayitem(op.getdescr())
+            # and then emit the operation
+            self.emit_operation(op)
 
     def optimize_QUASIIMMUT_FIELD(self, op):
         # Pattern: QUASIIMMUT_FIELD(s, descr=QuasiImmutDescr)
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization, CONST_1, CONST_0
 from pypy.jit.metainterp.optimizeopt.util import _findall
-from pypy.jit.metainterp.optimizeopt.intutils import IntBound, IntUnbounded, \
-    IntLowerBound, IntUpperBound
+from pypy.jit.metainterp.optimizeopt.intutils import (IntBound, IntUnbounded,
+    IntLowerBound, IntUpperBound)
 from pypy.jit.metainterp.history import Const, ConstInt
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 
@@ -23,7 +23,7 @@
 
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         assert self.posponedop is None
-        return self 
+        return self
 
     def propagate_forward(self, op):
         if op.is_ovf():
@@ -194,7 +194,7 @@
                 # Synthesize the reverse ops for optimize_default to reuse
                 self.pure(rop.INT_ADD, [op.result, op.getarg(1)], op.getarg(0))
                 self.pure(rop.INT_SUB, [op.getarg(0), op.result], op.getarg(1))
-                
+
 
     def optimize_INT_MUL_OVF(self, op):
         v1 = self.getvalue(op.getarg(0))
@@ -292,6 +292,11 @@
         v1.intbound.make_ge(IntLowerBound(0))
         v1.intbound.make_lt(IntUpperBound(256))
 
+    def optimize_UNICODEGETITEM(self, op):
+        self.emit_operation(op)
+        v1 = self.getvalue(op.result)
+        v1.intbound.make_ge(IntLowerBound(0))
+
     def make_int_lt(self, box1, box2):
         v1 = self.getvalue(box1)
         v2 = self.getvalue(box2)
@@ -368,6 +373,15 @@
                 if v2.intbound.intersect(v1.intbound):
                     self.propagate_bounds_backward(op.getarg(1))
 
+    def propagate_bounds_INT_IS_TRUE(self, op):
+        r = self.getvalue(op.result)
+        if r.is_constant():
+            if r.box.same_constant(CONST_1):
+                v1 = self.getvalue(op.getarg(0))
+                if v1.intbound.known_ge(IntBound(0, 0)):
+                    v1.intbound.make_gt(IntBound(0, 0))
+                    self.propagate_bounds_backward(op.getarg(0))
+
     def propagate_bounds_INT_ADD(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
@@ -413,5 +427,6 @@
     propagate_bounds_INT_SUB_OVF  = propagate_bounds_INT_SUB
     propagate_bounds_INT_MUL_OVF  = propagate_bounds_INT_MUL
 
+
 optimize_ops = _findall(OptIntBounds, 'optimize_')
 propagate_bounds_ops = _findall(OptIntBounds, 'propagate_bounds_')
diff --git a/pypy/jit/metainterp/optimizeopt/string.py b/pypy/jit/metainterp/optimizeopt/string.py
--- a/pypy/jit/metainterp/optimizeopt/string.py
+++ b/pypy/jit/metainterp/optimizeopt/string.py
@@ -348,7 +348,7 @@
     optimizer.emit_operation(ResOperation(rop.INT_SUB, [box1, box2], resbox))
     return resbox
 
-def _strgetitem(optimizer, strbox, indexbox, mode):
+def _strgetitem(optimization, strbox, indexbox, mode):
     if isinstance(strbox, ConstPtr) and isinstance(indexbox, ConstInt):
         if mode is mode_string:
             s = strbox.getref(lltype.Ptr(rstr.STR))
@@ -357,7 +357,7 @@
             s = strbox.getref(lltype.Ptr(rstr.UNICODE))
             return ConstInt(ord(s.chars[indexbox.getint()]))
     resbox = BoxInt()
-    optimizer.emit_operation(ResOperation(mode.STRGETITEM, [strbox, indexbox],
+    optimization.emit_operation(ResOperation(mode.STRGETITEM, [strbox, indexbox],
                                       resbox))
     return resbox
 
@@ -440,8 +440,7 @@
             if vindex.is_constant():
                 return value.getitem(vindex.box.getint())
         #
-        resbox = _strgetitem(self.optimizer,
-                             value.force_box(),vindex.force_box(), mode)
+        resbox = _strgetitem(self, value.force_box(), vindex.force_box(), mode)
         return self.getvalue(resbox)
 
     def optimize_STRLEN(self, op):
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -121,6 +121,41 @@
         print '\n'.join([str(o) for o in loop.operations])
         self.assert_equal(loop, expected)
 
+    def setup_method(self, meth=None):
+        class FailDescr(compile.ResumeGuardDescr):
+            oparse = None
+            def _oparser_uses_descr_of_guard(self, oparse, fail_args):
+                # typically called 3 times: once when parsing 'ops',
+                # once when parsing 'preamble', once when parsing 'expected'.
+                self.oparse = oparse
+                self.rd_frame_info_list, self.rd_snapshot = snapshot(fail_args)
+            def _clone_if_mutable(self):
+                assert self is fdescr
+                return fdescr2
+            def __repr__(self):
+                if self is fdescr:
+                    return 'fdescr'
+                if self is fdescr2:
+                    return 'fdescr2'
+                return compile.ResumeGuardDescr.__repr__(self)
+        #
+        def snapshot(fail_args, got=[]):
+            if not got:    # only the first time, i.e. when parsing 'ops'
+                rd_frame_info_list = resume.FrameInfo(None, "code", 11)
+                rd_snapshot = resume.Snapshot(None, fail_args)
+                got.append(rd_frame_info_list)
+                got.append(rd_snapshot)
+            return got
+        #
+        fdescr = instantiate(FailDescr)
+        self.namespace['fdescr'] = fdescr
+        fdescr2 = instantiate(FailDescr)
+        self.namespace['fdescr2'] = fdescr2
+
+    def teardown_method(self, meth):
+        self.namespace.pop('fdescr', None)
+        self.namespace.pop('fdescr2', None)
+
 
 
 class BaseTestOptimizeBasic(BaseTestBasic):
@@ -1070,8 +1105,8 @@
         """
         expected = """
         [i1, p0]
+        p1 = new_array(i1, descr=arraydescr)
         setarrayitem_gc(p0, 0, i1, descr=arraydescr)
-        p1 = new_array(i1, descr=arraydescr)
         jump(i1, p1)
         """
         self.optimize_loop(ops, expected)
@@ -1436,9 +1471,9 @@
         i3 = getarrayitem_gc_pure(p3, 1, descr=arraydescr)
         i4 = getarrayitem_gc(p3, i3, descr=arraydescr)
         i5 = int_add(i3, i4)
-        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         #
         setfield_gc(p1, i2, descr=valuedescr)
+        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         setfield_gc(p1, i4, descr=nextdescr)
         jump(p1, i1, i2, p3)
         """
@@ -1612,6 +1647,7 @@
         self.optimize_loop(ops, expected)
 
     def test_duplicate_getarrayitem_after_setarrayitem_2(self):
+        py.test.skip("setarrayitem with variable index")
         ops = """
         [p1, p2, p3, i1]
         setarrayitem_gc(p1, 0, p2, descr=arraydescr2)
@@ -1874,7 +1910,6 @@
         self.optimize_loop(ops, expected)
 
     def test_merge_guard_nonnull_guard_class(self):
-        self.make_fail_descr()
         ops = """
         [p1, i0, i1, i2, p2]
         guard_nonnull(p1, descr=fdescr) [i0]
@@ -1892,7 +1927,6 @@
         self.check_expanded_fail_descr("i0", rop.GUARD_NONNULL_CLASS)
 
     def test_merge_guard_nonnull_guard_value(self):
-        self.make_fail_descr()
         ops = """
         [p1, i0, i1, i2, p2]
         guard_nonnull(p1, descr=fdescr) [i0]
@@ -1910,7 +1944,6 @@
         self.check_expanded_fail_descr("i0", rop.GUARD_VALUE)
 
     def test_merge_guard_nonnull_guard_class_guard_value(self):
-        self.make_fail_descr()
         ops = """
         [p1, i0, i1, i2, p2]
         guard_nonnull(p1, descr=fdescr) [i0]
@@ -2203,23 +2236,6 @@
 
     # ----------
 
-    def make_fail_descr(self):
-        class FailDescr(compile.ResumeGuardDescr):
-            oparse = None
-            def _oparser_uses_descr_of_guard(self, oparse, fail_args):
-                # typically called twice, before and after optimization
-                if self.oparse is None:
-                    fdescr.rd_frame_info_list = resume.FrameInfo(None,
-                                                                 "code", 11)
-                    fdescr.rd_snapshot = resume.Snapshot(None, fail_args)
-                self.oparse = oparse
-        #
-        fdescr = instantiate(FailDescr)
-        self.namespace['fdescr'] = fdescr
-
-    def teardown_method(self, meth):
-        self.namespace.pop('fdescr', None)
-
     def _verify_fail_args(self, boxes, oparse, text):
         import re
         r = re.compile(r"\bwhere\s+(\w+)\s+is a\s+(\w+)")
@@ -2328,7 +2344,6 @@
         self._verify_fail_args(boxes, fdescr.oparse, expectedtext)
 
     def test_expand_fail_1(self):
-        self.make_fail_descr()
         ops = """
         [i1, i3]
         # first rename i3 into i4
@@ -2349,7 +2364,6 @@
         self.check_expanded_fail_descr('15, i3', rop.GUARD_TRUE)
 
     def test_expand_fail_2(self):
-        self.make_fail_descr()
         ops = """
         [i1, i2]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2369,7 +2383,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_3(self):
-        self.make_fail_descr()
         ops = """
         [i1, i2, i3, p3]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2395,7 +2408,7 @@
     def test_expand_fail_4(self):
         for arg in ['p1', 'i2,p1', 'p1,p2', 'p2,p1',
                     'i2,p1,p2', 'i2,p2,p1']:
-            self.make_fail_descr()
+            self.setup_method() # humpf
             ops = """
             [i1, i2, i3]
             p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2420,7 +2433,6 @@
                                            rop.GUARD_TRUE)
 
     def test_expand_fail_5(self):
-        self.make_fail_descr()
         ops = """
         [i1, i2, i3, i4]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -2444,7 +2456,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_6(self):
-        self.make_fail_descr()
         ops = """
         [p0, i0, i1]
         guard_true(i0, descr=fdescr) [p0]
@@ -2465,7 +2476,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_varray(self):
-        self.make_fail_descr()
         ops = """
         [i1]
         p1 = new_array(3, descr=arraydescr)
@@ -2486,7 +2496,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_vstruct(self):
-        self.make_fail_descr()
         ops = """
         [i1, p1]
         p2 = new(descr=ssize)
@@ -2508,7 +2517,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_v_all_1(self):
-        self.make_fail_descr()
         ops = """
         [i1, p1a, i2]
         p6s = getarrayitem_gc(p1a, 0, descr=arraydescr2)
@@ -2550,7 +2558,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_lazy_setfield_1(self):
-        self.make_fail_descr()
         ops = """
         [p1, i2, i3]
         p2 = new_with_vtable(ConstClass(node_vtable))
@@ -2576,7 +2583,6 @@
             ''', rop.GUARD_TRUE)
 
     def test_expand_fail_lazy_setfield_2(self):
-        self.make_fail_descr()
         ops = """
         [i2, i3]
         p2 = new_with_vtable(ConstClass(node_vtable))
@@ -2600,9 +2606,6 @@
             where p2 is a node_vtable, valuedescr=i2
             ''', rop.GUARD_TRUE)
 
-
-class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
-
     def test_residual_call_does_not_invalidate_caches(self):
         ops = """
         [p1, p2]
@@ -2894,7 +2897,6 @@
         self.optimize_loop(ops, expected)
 
     def test_vref_virtual_2(self):
-        self.make_fail_descr()
         ops = """
         [p0, i1]
         #
@@ -2940,7 +2942,6 @@
             ''', rop.GUARD_NOT_FORCED)
 
     def test_vref_virtual_and_lazy_setfield(self):
-        self.make_fail_descr()
         ops = """
         [p0, i1]
         #
@@ -2979,7 +2980,6 @@
             ''', rop.GUARD_NO_EXCEPTION)
 
     def test_vref_virtual_after_finish(self):
-        self.make_fail_descr()
         ops = """
         [i1]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -3006,7 +3006,6 @@
         self.optimize_loop(ops, expected)
 
     def test_vref_nonvirtual_and_lazy_setfield(self):
-        self.make_fail_descr()
         ops = """
         [i1, p1]
         p2 = virtual_ref(p1, 23)
@@ -4480,6 +4479,47 @@
         # not obvious, because of the exception UnicodeDecodeError that
         # can be raised by ll_str2unicode()
 
+    def test_strgetitem_repeated(self):
+        ops = """
+        [p0, i0]
+        i1 = strgetitem(p0, i0)
+        i2 = strgetitem(p0, i0)
+        i3 = int_eq(i1, i2)
+        guard_true(i3) []
+        escape(i2)
+        jump(p0, i0)
+        """
+        expected = """
+        [p0, i0]
+        i1 = strgetitem(p0, i0)
+        escape(i1)
+        jump(p0, i0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_int_is_true_bounds(self):
+        ops = """
+        [p0]
+        i0 = strlen(p0)
+        i1 = int_is_true(i0)
+        guard_true(i1) []
+        i2 = int_ge(0, i0)
+        guard_false(i2) []
+        jump(p0)
+        """
+        expected = """
+        [p0]
+        i0 = strlen(p0)
+        i1 = int_is_true(i0)
+        guard_true(i1) []
+        jump(p0)
+        """
+        self.optimize_loop(ops, expected)
+
+
+class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
+    pass
+
 
 ##class TestOOtype(BaseTestOptimizeBasic, OOtypeMixin):
 
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizefficall.py
@@ -51,7 +51,7 @@
                              restype=types.sint)
         #
         def calldescr(cpu, FUNC, oopspecindex, extraeffect=None):
-            einfo = EffectInfo([], [], [], oopspecindex=oopspecindex,
+            einfo = EffectInfo([], [], [], [], oopspecindex=oopspecindex,
                                extraeffect=extraeffect)
             return cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT, einfo)
         #
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1381,8 +1381,8 @@
         """
         expected = """
         [i1, p0]
+        p1 = new_array(i1, descr=arraydescr)
         setarrayitem_gc(p0, 0, i1, descr=arraydescr)
-        p1 = new_array(i1, descr=arraydescr)
         jump(i1, p1)
         """
         self.optimize_loop(ops, expected)
@@ -1806,9 +1806,9 @@
         i3 = getarrayitem_gc_pure(p3, 1, descr=arraydescr)
         i4 = getarrayitem_gc(p3, i3, descr=arraydescr)
         i5 = int_add(i3, i4)
-        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         #
         setfield_gc(p1, i2, descr=valuedescr)
+        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         setfield_gc(p1, i4, descr=nextdescr)
         escape()
         jump(p1, i1, i2, p3, i3)
@@ -1818,9 +1818,9 @@
         #
         i4 = getarrayitem_gc(p3, i3, descr=arraydescr)
         i5 = int_add(i3, i4)
-        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         #
         setfield_gc(p1, i2, descr=valuedescr)
+        setarrayitem_gc(p3, 0, i5, descr=arraydescr)
         setfield_gc(p1, i4, descr=nextdescr)
         escape()
         jump(p1, i1, i2, p3, i3)
@@ -2055,6 +2055,7 @@
         self.optimize_loop(ops, expected)
 
     def test_duplicate_getarrayitem_after_setarrayitem_2(self):
+        py.test.skip("setarrayitem with variable index")
         ops = """
         [p1, p2, p3, i1]
         setarrayitem_gc(p1, 0, p2, descr=arraydescr2)
@@ -2741,8 +2742,6 @@
 
     # ----------
 
-class TestLLtype(OptimizeOptTest, LLtypeMixin):
-
     def test_residual_call_does_not_invalidate_caches(self):
         ops = """
         [p1, p2]
@@ -5311,7 +5310,7 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
-    def test_strgetitem_small(self):
+    def test_strgetitem_bounds(self):
         ops = """
         [p0, i0]
         i1 = strgetitem(p0, i0)
@@ -5323,7 +5322,20 @@
         """
         expected = """
         [p0, i0]
-        i1 = strgetitem(p0, i0)
+        jump(p0, i0)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_unicodegetitem_bounds(self):
+        ops = """
+        [p0, i0]
+        i1 = unicodegetitem(p0, i0)
+        i2 = int_lt(i1, 0)
+        guard_false(i2) []
+        jump(p0, i0)
+        """
+        expected = """
+        [p0, i0]
         jump(p0, i0)
         """
         self.optimize_loop(ops, expected)
@@ -5863,4 +5875,28 @@
         escape(p0)
         jump(p0)
         """
-        self.optimize_loop(ops, expected)
\ No newline at end of file
+        self.optimize_loop(ops, expected)
+
+    def test_setarrayitem_lazy(self):
+        ops = """
+        [i0, i1]
+        p0 = escape()
+        i2 = escape()
+        p1 = new_with_vtable(ConstClass(node_vtable))
+        setarrayitem_gc(p0, 2, p1, descr=arraydescr)
+        guard_true(i2) []
+        setarrayitem_gc(p0, 2, p0, descr=arraydescr)
+        jump(i0, i1)
+        """
+        expected = """
+        [i0, i1]
+        p0 = escape()
+        i2 = escape()
+        guard_true(i2) [p0]
+        setarrayitem_gc(p0, 2, p0, descr=arraydescr)
+        jump(i0, i1)
+        """
+        self.optimize_loop(ops, expected)
+
+class TestLLtype(OptimizeOptTest, LLtypeMixin):
+    pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_util.py b/pypy/jit/metainterp/optimizeopt/test/test_util.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_util.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_util.py
@@ -166,19 +166,19 @@
     FUNC = lltype.FuncType([lltype.Signed], lltype.Signed)
     plaincalldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
     nonwritedescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                    EffectInfo([], [], []))
+                                    EffectInfo([], [], [], []))
     writeadescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                  EffectInfo([], [adescr], []))
+                                  EffectInfo([], [], [adescr], []))
     writearraydescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                      EffectInfo([], [adescr], [arraydescr]))
+                                  EffectInfo([], [], [adescr], [arraydescr]))
     readadescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                                 EffectInfo([adescr], [], []))
+                                 EffectInfo([adescr], [], [], []))
     mayforcevirtdescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                 EffectInfo([nextdescr], [], [],
+                 EffectInfo([nextdescr], [], [], [],
                             EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE,
                             can_invalidate=True))
     arraycopydescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                 EffectInfo([], [], [], oopspecindex=EffectInfo.OS_ARRAYCOPY))
+             EffectInfo([], [], [], [], oopspecindex=EffectInfo.OS_ARRAYCOPY))
 
     for _name, _os in [
         ('strconcatdescr',               'OS_STR_CONCAT'),
@@ -195,15 +195,15 @@
         _oopspecindex = getattr(EffectInfo, _os)
         locals()[_name] = \
             cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                EffectInfo([], [], [], oopspecindex=_oopspecindex))
+                EffectInfo([], [], [], [], oopspecindex=_oopspecindex))
         #
         _oopspecindex = getattr(EffectInfo, _os.replace('STR', 'UNI'))
         locals()[_name.replace('str', 'unicode')] = \
             cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                EffectInfo([], [], [], oopspecindex=_oopspecindex))
+                EffectInfo([], [], [], [], oopspecindex=_oopspecindex))
 
     s2u_descr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
-                EffectInfo([], [], [], oopspecindex=EffectInfo.OS_STR2UNICODE))
+            EffectInfo([], [], [], [], oopspecindex=EffectInfo.OS_STR2UNICODE))
     #
 
     class LoopToken(AbstractDescr):
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -1,5 +1,5 @@
-import py, os, sys
-from pypy.rpython.lltypesystem import lltype, llmemory, rclass
+import py, sys
+from pypy.rpython.lltypesystem import lltype, rclass
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
@@ -15,13 +15,12 @@
 from pypy.jit.metainterp.jitprof import EmptyProfiler
 from pypy.jit.metainterp.jitprof import GUARDS, RECORDED_OPS, ABORT_ESCAPE
 from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG, ABORT_BRIDGE, \
-                                        ABORT_BAD_LOOP, ABORT_FORCE_QUASIIMMUT
+                                        ABORT_FORCE_QUASIIMMUT
 from pypy.jit.metainterp.jitexc import JitException, get_llexception
-from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.objectmodel import specialize
-from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr, MissingLiveness
-from pypy.jit.codewriter import heaptracker, longlong
-from pypy.jit.metainterp.optimizeopt.util import args_dict_box, args_dict
+from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
+from pypy.jit.codewriter import heaptracker
+from pypy.jit.metainterp.optimizeopt.util import args_dict_box
 from pypy.jit.metainterp.optimize import RetraceLoop
 
 # ____________________________________________________________
@@ -885,7 +884,7 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jdindex, self.metainterp.in_recursion,
+        self.debug_merge_point(jitdriver_sd, jdindex, self.metainterp.in_recursion,
                                greenboxes)
 
         if self.metainterp.seen_loop_header_for_jdindex < 0:
@@ -932,8 +931,10 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jd_index, in_recursion, greenkey):
+    def debug_merge_point(self, jitdriver_sd, jd_index, in_recursion, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
+        loc = jitdriver_sd.warmstate.get_location_str(greenkey)
+        debug_print(loc)
         args = [ConstInt(jd_index), ConstInt(in_recursion)] + greenkey
         self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
@@ -2136,7 +2137,6 @@
     def vrefs_after_residual_call(self):
         vrefinfo = self.staticdata.virtualref_info
         for i in range(0, len(self.virtualref_boxes), 2):
-            virtualbox = self.virtualref_boxes[i]
             vrefbox = self.virtualref_boxes[i+1]
             vref = vrefbox.getref_base()
             if vrefinfo.tracing_after_residual_call(vref):
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -2,10 +2,12 @@
 from pypy.jit.metainterp.history import Box, Const, ConstInt, getkind
 from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat
 from pypy.jit.metainterp.history import INT, REF, FLOAT, HOLE
+from pypy.jit.metainterp.history import AbstractDescr
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp import jitprof
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rstr
+from pypy.rpython import annlowlevel
 from pypy.rlib import rarithmetic, rstack
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.rlib.debug import have_debug_prints, ll_assert
@@ -82,6 +84,13 @@
                             ('nums', lltype.Array(rffi.SHORT)))
 NUMBERINGP.TO.become(NUMBERING)
 
+PENDINGFIELDSTRUCT = lltype.Struct('PendingField',
+                                   ('lldescr', annlowlevel.base_ptr_lltype()),
+                                   ('num', rffi.SHORT),
+                                   ('fieldnum', rffi.SHORT),
+                                   ('itemindex', rffi.INT))
+PENDINGFIELDSP = lltype.Ptr(lltype.GcArray(PENDINGFIELDSTRUCT))
+
 TAGMASK = 3
 
 def tag(value, tagbits):
@@ -329,7 +338,7 @@
                 value = values[box]
                 value.get_args_for_fail(self)
 
-        for _, box, fieldbox in pending_setfields:
+        for _, box, fieldbox, _ in pending_setfields:
             self.register_box(box)
             self.register_box(fieldbox)
             value = values[fieldbox]
@@ -405,13 +414,25 @@
         return False
 
     def _add_pending_fields(self, pending_setfields):
-        rd_pendingfields = None
+        rd_pendingfields = lltype.nullptr(PENDINGFIELDSP.TO)
         if pending_setfields:
-            rd_pendingfields = []
-            for descr, box, fieldbox in pending_setfields:
+            n = len(pending_setfields)
+            rd_pendingfields = lltype.malloc(PENDINGFIELDSP.TO, n)
+            for i in range(n):
+                descr, box, fieldbox, itemindex = pending_setfields[i]
+                lldescr = annlowlevel.cast_instance_to_base_ptr(descr)
                 num = self._gettagged(box)
                 fieldnum = self._gettagged(fieldbox)
-                rd_pendingfields.append((descr, num, fieldnum))
+                # the index is limited to 2147483647 (64-bit machines only)
+                if itemindex > 2147483647:
+                    from pypy.jit.metainterp import compile
+                    compile.giveup()
+                itemindex = rffi.cast(rffi.INT, itemindex)
+                #
+                rd_pendingfields[i].lldescr  = lldescr
+                rd_pendingfields[i].num      = num
+                rd_pendingfields[i].fieldnum = fieldnum
+                rd_pendingfields[i].itemindex= itemindex
         self.storage.rd_pendingfields = rd_pendingfields
 
     def _gettagged(self, box):
@@ -727,10 +748,28 @@
             self.virtuals_cache = [self.virtual_default] * len(virtuals)
 
     def _prepare_pendingfields(self, pendingfields):
-        if pendingfields is not None:
-            for descr, num, fieldnum in pendingfields:
+        if pendingfields:
+            for i in range(len(pendingfields)):
+                lldescr  = pendingfields[i].lldescr
+                num      = pendingfields[i].num
+                fieldnum = pendingfields[i].fieldnum
+                itemindex= pendingfields[i].itemindex
+                descr = annlowlevel.cast_base_ptr_to_instance(AbstractDescr,
+                                                              lldescr)
                 struct = self.decode_ref(num)
-                self.setfield(descr, struct, fieldnum)
+                itemindex = rffi.cast(lltype.Signed, itemindex)
+                if itemindex < 0:
+                    self.setfield(descr, struct, fieldnum)
+                else:
+                    self.setarrayitem(descr, struct, itemindex, fieldnum)
+
+    def setarrayitem(self, arraydescr, array, index, fieldnum):
+        if arraydescr.is_array_of_pointers():
+            self.setarrayitem_ref(arraydescr, array, index, fieldnum)
+        elif arraydescr.is_array_of_floats():
+            self.setarrayitem_float(arraydescr, array, index, fieldnum)
+        else:
+            self.setarrayitem_int(arraydescr, array, index, fieldnum)
 
     def _prepare_next_section(self, info):
         # Use info.enumerate_vars(), normally dispatching to
@@ -903,15 +942,15 @@
                                            structbox, fieldbox)
 
     def setarrayitem_int(self, arraydescr, arraybox, index, fieldnum):
-        self.setarrayitem(arraydescr, arraybox, index, fieldnum, INT)
+        self._setarrayitem(arraydescr, arraybox, index, fieldnum, INT)
 
     def setarrayitem_ref(self, arraydescr, arraybox, index, fieldnum):
-        self.setarrayitem(arraydescr, arraybox, index, fieldnum, REF)
+        self._setarrayitem(arraydescr, arraybox, index, fieldnum, REF)
 
     def setarrayitem_float(self, arraydescr, arraybox, index, fieldnum):
-        self.setarrayitem(arraydescr, arraybox, index, fieldnum, FLOAT)
+        self._setarrayitem(arraydescr, arraybox, index, fieldnum, FLOAT)
 
-    def setarrayitem(self, arraydescr, arraybox, index, fieldnum, kind):
+    def _setarrayitem(self, arraydescr, arraybox, index, fieldnum, kind):
         itembox = self.decode_box(fieldnum, kind)
         self.metainterp.execute_and_record(rop.SETARRAYITEM_GC,
                                            arraydescr, arraybox,
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -1677,6 +1677,8 @@
         res = self.meta_interp(g, [6, 14])
         assert res == g(6, 14)
         self.check_loop_count(9)
+        self.check_loops(getarrayitem_gc=8, everywhere=True)
+        py.test.skip("for the following, we need setarrayitem(varindex)")
         self.check_loops(getarrayitem_gc=6, everywhere=True)
 
     def test_multiple_specialied_versions_bridge(self):
@@ -2296,6 +2298,21 @@
         res = self.meta_interp(f, [1])
         assert res == f(1)
 
+    def test_remove_array_operations(self):
+        myjitdriver = JitDriver(greens = [], reds = ['a'])
+        class W_Int:
+            def __init__(self, intvalue):
+                self.intvalue = intvalue
+        def f(x):
+            a = [W_Int(x)]
+            while a[0].intvalue > 0:
+                myjitdriver.jit_merge_point(a=a)
+                a[0] = W_Int(a[0].intvalue - 3)
+            return a[0].intvalue
+        res = self.meta_interp(f, [100])
+        assert res == -2
+        #self.check_loops(getarrayitem_gc=0, setarrayitem_gc=0) -- xxx?
+
 class TestOOtype(BasicTests, OOJitMixin):
 
     def test_oohash(self):
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -49,7 +49,7 @@
                 x = l[n]
                 l = [3] * 100
                 l[3] = x
-                l[3] = x + 1
+                l[4] = x + 1
                 n -= 1
             return l[0]
 
diff --git a/pypy/jit/metainterp/test/test_resume.py b/pypy/jit/metainterp/test/test_resume.py
--- a/pypy/jit/metainterp/test/test_resume.py
+++ b/pypy/jit/metainterp/test/test_resume.py
@@ -1238,7 +1238,7 @@
     liveboxes = []
     modifier._number_virtuals(liveboxes, values, 0)
     assert liveboxes == [b2s, b4s] or liveboxes == [b4s, b2s]
-    modifier._add_pending_fields([(LLtypeMixin.nextdescr, b2s, b4s)])
+    modifier._add_pending_fields([(LLtypeMixin.nextdescr, b2s, b4s, -1)])
     storage.rd_consts = memo.consts[:]
     storage.rd_numb = None
     # resume
@@ -1259,6 +1259,106 @@
     assert len(expected) == len(trace)
     assert demo55.next == demo66
 
+def test_virtual_adder_pending_fields_and_arrayitems():
+    class Storage(object):
+        pass
+    storage = Storage()
+    modifier = ResumeDataVirtualAdder(storage, None)
+    modifier._add_pending_fields([])
+    assert not storage.rd_pendingfields
+    #
+    class FieldDescr(object):
+        pass
+    field_a = FieldDescr()
+    storage = Storage()
+    modifier = ResumeDataVirtualAdder(storage, None)
+    modifier.liveboxes_from_env = {42: rffi.cast(rffi.SHORT, 1042),
+                                   61: rffi.cast(rffi.SHORT, 1061)}
+    modifier._add_pending_fields([(field_a, 42, 61, -1)])
+    pf = storage.rd_pendingfields
+    assert len(pf) == 1
+    assert (annlowlevel.cast_base_ptr_to_instance(FieldDescr, pf[0].lldescr)
+            is field_a)
+    assert rffi.cast(lltype.Signed, pf[0].num) == 1042
+    assert rffi.cast(lltype.Signed, pf[0].fieldnum) == 1061
+    assert rffi.cast(lltype.Signed, pf[0].itemindex) == -1
+    #
+    array_a = FieldDescr()
+    storage = Storage()
+    modifier = ResumeDataVirtualAdder(storage, None)
+    modifier.liveboxes_from_env = {42: rffi.cast(rffi.SHORT, 1042),
+                                   61: rffi.cast(rffi.SHORT, 1061),
+                                   62: rffi.cast(rffi.SHORT, 1062),
+                                   63: rffi.cast(rffi.SHORT, 1063)}
+    modifier._add_pending_fields([(array_a, 42, 61, 0),
+                                  (array_a, 42, 62, 2147483647)])
+    pf = storage.rd_pendingfields
+    assert len(pf) == 2
+    assert (annlowlevel.cast_base_ptr_to_instance(FieldDescr, pf[0].lldescr)
+            is array_a)
+    assert rffi.cast(lltype.Signed, pf[0].num) == 1042
+    assert rffi.cast(lltype.Signed, pf[0].fieldnum) == 1061
+    assert rffi.cast(lltype.Signed, pf[0].itemindex) == 0
+    assert (annlowlevel.cast_base_ptr_to_instance(FieldDescr, pf[1].lldescr)
+            is array_a)
+    assert rffi.cast(lltype.Signed, pf[1].num) == 1042
+    assert rffi.cast(lltype.Signed, pf[1].fieldnum) == 1062
+    assert rffi.cast(lltype.Signed, pf[1].itemindex) == 2147483647
+    #
+    from pypy.jit.metainterp.pyjitpl import SwitchToBlackhole
+    py.test.raises(SwitchToBlackhole, modifier._add_pending_fields,
+                   [(array_a, 42, 63, 2147483648)])
+
+def test_resume_reader_fields_and_arrayitems():
+    class ResumeReader(AbstractResumeDataReader):
+        def __init__(self, got=None, got_array=None):
+            self.got = got
+            self.got_array = got_array
+        def setfield(self, descr, struct, fieldnum):
+            assert lltype.typeOf(struct) is lltype.Signed
+            assert lltype.typeOf(fieldnum) is rffi.SHORT
+            fieldnum = rffi.cast(lltype.Signed, fieldnum)
+            self.got.append((descr, struct, fieldnum))
+        def setarrayitem(self, arraydescr, array, index, fieldnum):
+            assert lltype.typeOf(array) is lltype.Signed
+            assert lltype.typeOf(index) is lltype.Signed
+            assert lltype.typeOf(fieldnum) is rffi.SHORT
+            fieldnum = rffi.cast(lltype.Signed, fieldnum)
+            self.got_array.append((arraydescr, array, index, fieldnum))
+        def decode_ref(self, num):
+            return rffi.cast(lltype.Signed, num) * 100
+    got = []
+    pf = lltype.nullptr(PENDINGFIELDSP.TO)
+    ResumeReader(got)._prepare_pendingfields(pf)
+    assert got == []
+    #
+    class FieldDescr(AbstractDescr):
+        pass
+    field_a = FieldDescr()
+    field_b = FieldDescr()
+    pf = lltype.malloc(PENDINGFIELDSP.TO, 2)
+    pf[0].lldescr = annlowlevel.cast_instance_to_base_ptr(field_a)
+    pf[0].num = rffi.cast(rffi.SHORT, 1042)
+    pf[0].fieldnum = rffi.cast(rffi.SHORT, 1061)
+    pf[0].itemindex = rffi.cast(rffi.INT, -1)
+    pf[1].lldescr = annlowlevel.cast_instance_to_base_ptr(field_b)
+    pf[1].num = rffi.cast(rffi.SHORT, 2042)
+    pf[1].fieldnum = rffi.cast(rffi.SHORT, 2061)
+    pf[1].itemindex = rffi.cast(rffi.INT, -1)
+    got = []
+    ResumeReader(got)._prepare_pendingfields(pf)
+    assert got == [(field_a, 104200, 1061), (field_b, 204200, 2061)]
+    #
+    array_a = FieldDescr()
+    pf = lltype.malloc(PENDINGFIELDSP.TO, 1)
+    pf[0].lldescr = annlowlevel.cast_instance_to_base_ptr(array_a)
+    pf[0].num = rffi.cast(rffi.SHORT, 1042)
+    pf[0].fieldnum = rffi.cast(rffi.SHORT, 1063)
+    pf[0].itemindex = rffi.cast(rffi.INT, 123)
+    got_array = []
+    ResumeReader(got_array=got_array)._prepare_pendingfields(pf)
+    assert got_array == [(array_a, 104200, 123, 1063)]
+
 
 def test_invalidation_needed():
     class options:
diff --git a/pypy/jit/metainterp/virtualref.py b/pypy/jit/metainterp/virtualref.py
--- a/pypy/jit/metainterp/virtualref.py
+++ b/pypy/jit/metainterp/virtualref.py
@@ -1,5 +1,5 @@
 from pypy.rpython.rmodel import inputconst, log
-from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rclass
+from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.jit.metainterp import history
 from pypy.jit.codewriter import heaptracker
 from pypy.rlib.jit import InvalidVirtualRef
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -1,6 +1,5 @@
 import sys, py
-from pypy.rpython.lltypesystem import lltype, llmemory, rclass, rstr
-from pypy.rpython.ootypesystem import ootype
+from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.annlowlevel import llhelper, MixLevelHelperAnnotator,\
      cast_base_ptr_to_instance, hlstr
 from pypy.annotation import model as annmodel
@@ -10,16 +9,12 @@
 from pypy.objspace.flow.model import checkgraph, Link, copygraph
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.rarithmetic import r_uint, intmask
-from pypy.rlib.debug import debug_print, fatalerror
-from pypy.rlib.debug import debug_start, debug_stop
-from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.translator.simplify import get_funcobj, get_functype
+from pypy.rlib.debug import fatalerror
+from pypy.translator.simplify import get_functype
 from pypy.translator.unsimplify import call_final_function
 
 from pypy.jit.metainterp import history, pyjitpl, gc, memmgr
-from pypy.jit.metainterp.pyjitpl import MetaInterpStaticData, MetaInterp
-from pypy.jit.metainterp.typesystem import LLTypeHelper, OOTypeHelper
+from pypy.jit.metainterp.pyjitpl import MetaInterpStaticData
 from pypy.jit.metainterp.jitprof import Profiler, EmptyProfiler
 from pypy.jit.metainterp.jitexc import JitException
 from pypy.jit.metainterp.jitdriver import JitDriverStaticData
@@ -297,9 +292,6 @@
         self.stats = stats
         if translate_support_code:
             self.annhelper = MixLevelHelperAnnotator(self.translator.rtyper)
-            annhelper = self.annhelper
-        else:
-            annhelper = None
         cpu = CPUClass(self.translator.rtyper, self.stats, self.opt,
                        translate_support_code, gcdescr=self.gcdescr)
         self.cpu = cpu
@@ -440,7 +432,6 @@
             maybe_enter_jit._always_inline_ = True
         jd._maybe_enter_jit_fn = maybe_enter_jit
 
-        num_green_args = jd.num_green_args
         def maybe_enter_from_start(*args):
             maybe_compile_and_run(state.increment_function_threshold, *args)
         maybe_enter_from_start._always_inline_ = True
@@ -553,7 +544,6 @@
             self.rewrite_can_enter_jit(jd, sublist)
 
     def rewrite_can_enter_jit(self, jd, can_enter_jits):
-        FUNC = jd._JIT_ENTER_FUNCTYPE
         FUNCPTR = jd._PTR_JIT_ENTER_FUNCTYPE
         jit_enter_fnptr = self.helper_func(FUNCPTR, jd._maybe_enter_jit_fn)
 
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -1,7 +1,7 @@
 import sys, weakref
 from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
-from pypy.rpython.annlowlevel import hlstr, llstr, cast_base_ptr_to_instance
+from pypy.rpython.annlowlevel import hlstr, cast_base_ptr_to_instance
 from pypy.rpython.annlowlevel import cast_object_to_ptr
 from pypy.rlib.objectmodel import specialize, we_are_translated, r_dict
 from pypy.rlib.rarithmetic import intmask
@@ -502,7 +502,6 @@
         if hasattr(self, 'set_future_values'):
             return self.set_future_values
 
-        warmrunnerdesc = self.warmrunnerdesc
         jitdriver_sd   = self.jitdriver_sd
         cpu = self.cpu
         vinfo = jitdriver_sd.virtualizable_info
@@ -518,7 +517,6 @@
         #
         if vinfo is not None:
             i0 = len(jitdriver_sd._red_args_types)
-            num_green_args = jitdriver_sd.num_green_args
             index_of_virtualizable = jitdriver_sd.index_of_virtualizable
             vable_static_fields = unrolling_iterable(
                 zip(vinfo.static_extra_types, vinfo.static_fields))
diff --git a/pypy/jit/tool/pypytrace-mode.el b/pypy/jit/tool/pypytrace-mode.el
--- a/pypy/jit/tool/pypytrace-mode.el
+++ b/pypy/jit/tool/pypytrace-mode.el
@@ -32,7 +32,7 @@
     ("<.*FieldDescr \\([^ ]*\\)" (1 'font-lock-variable-name-face))
     ;; comment out debug_merge_point, but then highlight specific part of it
     ("^debug_merge_point.*" . font-lock-comment-face)
-    ("^\\(debug_merge_point\\).*code object\\(.*\\), file \\('.*'\\), \\(line .*\\)> \\(.*\\)"
+    ("^\\(debug_merge_point\\).*code object\\(.*\\). file \\('.*'\\). \\(line .*\\)> \\(.*\\)"
      (1 'compilation-warning t)
      (2 'escape-glyph t)
      (3 'font-lock-string-face t)
diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py
--- a/pypy/module/__builtin__/functional.py
+++ b/pypy/module/__builtin__/functional.py
@@ -294,7 +294,7 @@
             break
         new_frame = space.createframe(code, w_func.w_func_globals,
                                       w_func.closure)
-        new_frame.fastlocals_w[0] = w_item
+        new_frame.locals_stack_w[0] = w_item
         w_res = new_frame.run()
         result_w.append(w_res)
     return result_w
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -3,6 +3,14 @@
 from pypy.interpreter.mixedmodule import MixedModule
 from pypy.module.imp.importing import get_pyc_magic
 
+
+class BuildersModule(MixedModule):
+    appleveldefs = {}
+
+    interpleveldefs = {
+        "UnicodeBuilder": "interp_builders.W_UnicodeBuilder",
+    }
+
 class Module(MixedModule):
     appleveldefs = {
     }
@@ -19,6 +27,10 @@
         'lookup_special'            : 'interp_magic.lookup_special',
     }
 
+    submodules = {
+        "builders": BuildersModule,
+    }
+
     def setup_after_space_initialization(self):
         """NOT_RPYTHON"""
         if not self.space.config.translating:
diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/interp_builders.py
@@ -0,0 +1,50 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef
+from pypy.rlib.rstring import UnicodeBuilder
+
+
+class W_UnicodeBuilder(Wrappable):
+    def __init__(self, space, size):
+        if size == -1:
+            self.builder = UnicodeBuilder()
+        else:
+            self.builder = UnicodeBuilder(size)
+        self.done = False
+
+    def _check_done(self, space):
+        if self.done:
+            raise OperationError(space.w_ValueError, space.wrap("Can't operate on a done builder"))
+
+    @unwrap_spec(size=int)
+    def descr__new__(space, w_subtype, size=-1):
+        return W_UnicodeBuilder(space, size)
+
+    @unwrap_spec(s=unicode)
+    def descr_append(self, space, s):
+        self._check_done(space)
+        self.builder.append(s)
+
+    @unwrap_spec(s=unicode, start=int, end=int)
+    def descr_append_slice(self, space, s, start, end):
+        self._check_done(space)
+        if not 0 <= start <= end <= len(s):
+            raise OperationError(space.w_ValueError, space.wrap("bad start/stop"))
+        self.builder.append_slice(s, start, end)
+
+    def descr_build(self, space):
+        self._check_done(space)
+        w_s = space.wrap(self.builder.build())
+        self.done = True
+        return w_s
+
+
+W_UnicodeBuilder.typedef = TypeDef("UnicodeBuilder",
+    __new__ = interp2app(W_UnicodeBuilder.descr__new__.im_func),
+
+    append = interp2app(W_UnicodeBuilder.descr_append),
+    append_slice = interp2app(W_UnicodeBuilder.descr_append_slice),
+    build = interp2app(W_UnicodeBuilder.descr_build),
+)
+W_UnicodeBuilder.typedef.acceptable_as_base_class = False
\ No newline at end of file
diff --git a/pypy/module/__pypy__/interp_debug.py b/pypy/module/__pypy__/interp_debug.py
--- a/pypy/module/__pypy__/interp_debug.py
+++ b/pypy/module/__pypy__/interp_debug.py
@@ -1,15 +1,19 @@
 from pypy.interpreter.gateway import interp2app, NoneNotWrapped, unwrap_spec
 from pypy.interpreter.error import OperationError
-from pypy.rlib import debug
+from pypy.rlib import debug, jit
 
+
+ at jit.dont_look_inside
 @unwrap_spec(category=str)
 def debug_start(space, category):
     debug.debug_start(category)
 
+ at jit.dont_look_inside
 def debug_print(space, args_w):
     parts = [space.str_w(space.str(w_item)) for w_item in args_w]
     debug.debug_print(' '.join(parts))
 
+ at jit.dont_look_inside
 @unwrap_spec(category=str)
 def debug_stop(space, category):
     debug.debug_stop(category)
diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/test/test_builders.py
@@ -0,0 +1,34 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestBuilders(object):
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['__pypy__'])
+
+    def test_simple(self):
+        from __pypy__.builders import UnicodeBuilder
+        b = UnicodeBuilder()
+        b.append(u"abc")
+        b.append(u"123")
+        b.append(u"1")
+        s = b.build()
+        assert s == u"abc1231"
+        raises(ValueError, b.build)
+        raises(ValueError, b.append, u"123")
+
+    def test_preallocate(self):
+        from __pypy__.builders import UnicodeBuilder
+        b = UnicodeBuilder(10)
+        b.append(u"abc")
+        b.append(u"123")
+        s = b.build()
+        assert s == u"abc123"
+
+    def test_append_slice(self):
+        from __pypy__.builders import UnicodeBuilder
+        b = UnicodeBuilder()
+        b.append_slice(u"abcdefgh", 2, 5)
+        raises(ValueError, b.append_slice, u"1", 2, 1)
+        s = b.build()
+        assert s == "cde"
+        raises(ValueError, b.append_slice, u"abc", 1, 2)
\ No newline at end of file
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -900,7 +900,7 @@
 
 def _ssl_thread_id_function():
     from pypy.module.thread import ll_thread
-    return rffi.cast(rffi.INT, ll_thread.get_ident())
+    return rffi.cast(rffi.LONG, ll_thread.get_ident())
 
 def setup_ssl_threads():
     from pypy.module.thread import ll_thread
diff --git a/pypy/module/_stackless/test/test_greenlet.py b/pypy/module/_stackless/test/test_greenlet.py
--- a/pypy/module/_stackless/test/test_greenlet.py
+++ b/pypy/module/_stackless/test/test_greenlet.py
@@ -72,6 +72,23 @@
         g1 = greenlet(f)
         raises(ValueError, g2.switch)
 
+
+    def test_exc_info_save_restore(self):
+        from _stackless import greenlet
+        import sys
+        def f():
+            try:
+                raise ValueError('fun')
+            except:
+                exc_info = sys.exc_info()
+                greenlet(h).switch()
+                assert exc_info == sys.exc_info()
+
+        def h():
+            assert sys.exc_info() == (None, None, None)
+
+        greenlet(f).switch()
+
     def test_exception(self):
         from _stackless import greenlet
         import sys
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -39,6 +39,7 @@
 import pypy.module.cpyext.object
 import pypy.module.cpyext.stringobject
 import pypy.module.cpyext.tupleobject
+import pypy.module.cpyext.setobject
 import pypy.module.cpyext.dictobject
 import pypy.module.cpyext.intobject
 import pypy.module.cpyext.longobject
@@ -64,6 +65,7 @@
 import pypy.module.cpyext.memoryobject
 import pypy.module.cpyext.codecs
 import pypy.module.cpyext.pyfile
+import pypy.module.cpyext.pystrtod
 
 # now that all rffi_platform.Struct types are registered, configure them
 api.configure_types()
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -562,7 +562,8 @@
             elif callable.api_func.restype is not lltype.Void:
                 retval = rffi.cast(callable.api_func.restype, result)
         except Exception, e:
-            print 'Fatal error in cpyext, calling', callable.__name__
+            print 'Fatal error in cpyext, CPython compatibility layer, calling', callable.__name__
+            print 'Either report a bug or consider not using this particular extension'
             if not we_are_translated():
                 import traceback
                 traceback.print_exc()
diff --git a/pypy/module/cpyext/pystrtod.py b/pypy/module/cpyext/pystrtod.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/pystrtod.py
@@ -0,0 +1,68 @@
+import errno
+from pypy.interpreter.error import OperationError
+from pypy.module.cpyext.api import cpython_api
+from pypy.module.cpyext.pyobject import PyObject
+from pypy.rlib import rdtoa
+from pypy.rlib import rfloat
+from pypy.rlib import rposix
+from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import rffi
+
+
+ at cpython_api([rffi.CCHARP, rffi.CCHARPP, PyObject], rffi.DOUBLE, error=-1.0)
+def PyOS_string_to_double(space, s, endptr, w_overflow_exception):
+    """Convert a string s to a double, raising a Python
+    exception on failure.  The set of accepted strings corresponds to
+    the set of strings accepted by Python's float() constructor,
+    except that s must not have leading or trailing whitespace.
+    The conversion is independent of the current locale.
+
+    If endptr is NULL, convert the whole string.  Raise
+    ValueError and return -1.0 if the string is not a valid
+    representation of a floating-point number.
+
+    If endptr is not NULL, convert as much of the string as
+    possible and set *endptr to point to the first unconverted
+    character.  If no initial segment of the string is the valid
+    representation of a floating-point number, set *endptr to point
+    to the beginning of the string, raise ValueError, and return
+    -1.0.
+
+    If s represents a value that is too large to store in a float
+    (for example, "1e500" is such a string on many platforms) then
+    if overflow_exception is NULL return Py_HUGE_VAL (with
+    an appropriate sign) and don't set any exception.  Otherwise,
+    overflow_exception must point to a Python exception object;
+    raise that exception and return -1.0.  In both cases, set
+    *endptr to point to the first character after the converted value.
+
+    If any other error occurs during the conversion (for example an
+    out-of-memory error), set the appropriate Python exception and
+    return -1.0.
+    """
+    user_endptr = True
+    try:
+        if not endptr:
+            endptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+            user_endptr = False
+        result = rdtoa.dg_strtod(s, endptr)
+        endpos = (rffi.cast(rffi.LONG, endptr[0]) -
+                  rffi.cast(rffi.LONG, s))
+        if endpos == 0 or (not user_endptr and not endptr[0][0] == '\0'):
+            raise OperationError(
+                space.w_ValueError,
+                space.wrap('invalid input at position %s' % endpos))
+        if rposix.get_errno() == errno.ERANGE:
+            rposix.set_errno(0)
+            if w_overflow_exception is None:
+                if result > 0:
+                    return rfloat.INFINITY
+                else:
+                    return -rfloat.INFINITY
+            else:
+                raise OperationError(w_overflow_exception,
+                                     space.wrap('value too large'))
+        return result
+    finally:
+        if not user_endptr:
+            lltype.free(endptr, flavor='raw')
diff --git a/pypy/module/cpyext/setobject.py b/pypy/module/cpyext/setobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/setobject.py
@@ -0,0 +1,46 @@
+from pypy.interpreter.error import OperationError
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.module.cpyext.api import (cpython_api, Py_ssize_t, CANNOT_FAIL,
+                                    build_type_checkers)
+from pypy.module.cpyext.pyobject import (PyObject, PyObjectP, Py_DecRef,
+    borrow_from, make_ref, from_ref)
+from pypy.module.cpyext.pyerrors import PyErr_BadInternalCall
+from pypy.objspace.std.setobject import W_SetObject, newset
+from pypy.objspace.std.smalltupleobject import W_SmallTupleObject
+
+
+PySet_Check, PySet_CheckExact = build_type_checkers("Set")
+
+
+ at cpython_api([PyObject], PyObject)
+def PySet_New(space, w_iterable):
+    if w_iterable is None:
+        return space.call_function(space.w_set)
+    else:
+        return space.call_function(space.w_set, w_iterable)
+
+ at cpython_api([PyObject, PyObject], rffi.INT_real, error=-1)
+def PySet_Add(space, w_s, w_obj):
+    if not PySet_Check(space, w_s):
+        PyErr_BadInternalCall(space)
+    space.call_method(w_s, 'add', w_obj)
+    return 0
+
+ at cpython_api([PyObject, PyObject], rffi.INT_real, error=-1)
+def PySet_Discard(space, w_s, w_obj):
+    if not PySet_Check(space, w_s):
+        PyErr_BadInternalCall(space)
+    space.call_method(w_s, 'discard', w_obj)
+    return 0
+
+
+ at cpython_api([PyObject], Py_ssize_t, error=CANNOT_FAIL)
+def PySet_GET_SIZE(space, w_s):
+    return space.int_w(space.len(w_s))
+
+ at cpython_api([PyObject], Py_ssize_t, error=-1)
+def PySet_Size(space, ref):
+    if not PySet_Check(space, ref):
+        raise OperationError(space.w_TypeError,
+                             space.wrap("expected set object"))
+    return PySet_GET_SIZE(space, ref)
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -480,39 +480,6 @@
     """Create a new Python complex number object from a C Py_complex value."""
     raise NotImplementedError
 
- at cpython_api([rffi.CCHARP, rffi.CCHARPP, PyObject], rffi.DOUBLE, error=-1.0)
-def PyOS_string_to_double(space, s, endptr, overflow_exception):
-    """Convert a string s to a double, raising a Python
-    exception on failure.  The set of accepted strings corresponds to
-    the set of strings accepted by Python's float() constructor,
-    except that s must not have leading or trailing whitespace.
-    The conversion is independent of the current locale.
-
-    If endptr is NULL, convert the whole string.  Raise
-    ValueError and return -1.0 if the string is not a valid
-    representation of a floating-point number.
-
-    If endptr is not NULL, convert as much of the string as
-    possible and set *endptr to point to the first unconverted
-    character.  If no initial segment of the string is the valid
-    representation of a floating-point number, set *endptr to point
-    to the beginning of the string, raise ValueError, and return
-    -1.0.
-
-    If s represents a value that is too large to store in a float
-    (for example, "1e500" is such a string on many platforms) then
-    if overflow_exception is NULL return Py_HUGE_VAL (with
-    an appropriate sign) and don't set any exception.  Otherwise,
-    overflow_exception must point to a Python exception object;
-    raise that exception and return -1.0.  In both cases, set
-    *endptr to point to the first character after the converted value.
-
-    If any other error occurs during the conversion (for example an
-    out-of-memory error), set the appropriate Python exception and
-    return -1.0.
-    """
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, rffi.CCHARPP], rffi.DOUBLE, error=CANNOT_FAIL)
 def PyOS_ascii_strtod(space, nptr, endptr):
     """Convert a string to a double. This function behaves like the Standard C
diff --git a/pypy/module/cpyext/test/test_pystrtod.py b/pypy/module/cpyext/test/test_pystrtod.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/test_pystrtod.py
@@ -0,0 +1,93 @@
+import math
+
+from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.rpython.lltypesystem import rffi
+from pypy.rpython.lltypesystem import lltype
+
+
+class TestPyOS_string_to_double(BaseApiTest):
+
+    def test_simple_float(self, api):
+        s = rffi.str2charp('0.4')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert r == 0.4
+        rffi.free_charp(s)
+
+    def test_empty_string(self, api):
+        s = rffi.str2charp('')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert r == -1.0
+        raises(ValueError)
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+
+    def test_bad_string(self, api):
+        s = rffi.str2charp(' 0.4')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert r == -1.0
+        raises(ValueError)
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+
+    def test_overflow_pos(self, api):
+        s = rffi.str2charp('1e500')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert math.isinf(r)
+        assert r > 0
+        rffi.free_charp(s)
+
+    def test_overflow_neg(self, api):
+        s = rffi.str2charp('-1e500')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, None)
+        assert math.isinf(r)
+        assert r < 0
+        rffi.free_charp(s)
+
+    def test_overflow_exc(self, space, api):
+        s = rffi.str2charp('1e500')
+        null = lltype.nullptr(rffi.CCHARPP.TO)
+        r = api.PyOS_string_to_double(s, null, space.w_ValueError)
+        assert r == -1.0
+        raises(ValueError)
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+
+    def test_endptr_number(self, api):
+        s = rffi.str2charp('0.4')
+        endp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        r = api.PyOS_string_to_double(s, endp, None)
+        assert r == 0.4
+        endp_addr = rffi.cast(rffi.LONG, endp[0])
+        s_addr = rffi.cast(rffi.LONG, s)
+        assert endp_addr == s_addr + 3
+        rffi.free_charp(s)
+        lltype.free(endp, flavor='raw')
+
+    def test_endptr_tail(self, api):
+        s = rffi.str2charp('0.4 foo')
+        endp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        r = api.PyOS_string_to_double(s, endp, None)
+        assert r == 0.4
+        endp_addr = rffi.cast(rffi.LONG, endp[0])
+        s_addr = rffi.cast(rffi.LONG, s)
+        assert endp_addr == s_addr + 3
+        rffi.free_charp(s)
+        lltype.free(endp, flavor='raw')
+
+    def test_endptr_no_conversion(self, api):
+        s = rffi.str2charp('foo')
+        endp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        r = api.PyOS_string_to_double(s, endp, None)
+        assert r == -1.0
+        raises(ValueError)
+        endp_addr = rffi.cast(rffi.LONG, endp[0])
+        s_addr = rffi.cast(rffi.LONG, s)
+        assert endp_addr == s_addr
+        api.PyErr_Clear()
+        rffi.free_charp(s)
+        lltype.free(endp, flavor='raw')
diff --git a/pypy/module/cpyext/test/test_setobject.py b/pypy/module/cpyext/test/test_setobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/test_setobject.py
@@ -0,0 +1,29 @@
+import py
+
+from pypy.module.cpyext.pyobject import PyObject, PyObjectP, make_ref, from_ref
+from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.conftest import gettestobjspace
+
+
+class TestTupleObject(BaseApiTest):
+    def test_setobj(self, space, api):
+        assert not api.PySet_Check(space.w_None)
+        assert api.PySet_Add(space.w_None, space.w_None) == -1
+        api.PyErr_Clear()
+        w_set = space.call_function(space.w_set)
+        space.call_method(w_set, 'update', space.wrap([1,2,3,4]))
+        assert api.PySet_Size(w_set) == 4
+        assert api.PySet_GET_SIZE(w_set) == 4
+        raises(TypeError, api.PySet_Size(space.newlist([])))
+        api.PyErr_Clear()
+
+    def test_set_add_discard(self, space, api):
+        w_set = api.PySet_New(None)
+        assert api.PySet_Size(w_set) == 0
+        w_set = api.PySet_New(space.wrap([1,2,3,4]))
+        assert api.PySet_Size(w_set) == 4
+        api.PySet_Add(w_set, space.wrap(6))
+        assert api.PySet_Size(w_set) == 5
+        api.PySet_Discard(w_set, space.wrap(6))
+        assert api.PySet_Size(w_set) == 4
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -367,3 +367,14 @@
                     data, len(u), lltype.nullptr(rffi.CCHARP.TO))
         rffi.free_wcharp(data)
 
+    def test_format(self, space, api):
+        w_format = space.wrap(u'hi %s')
+        w_args = space.wrap((u'test',))
+        w_formated = api.PyUnicode_Format(w_format, w_args)
+        assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args))
+
+    def test_join(self, space, api):
+        w_sep = space.wrap(u'<sep>')
+        w_seq = space.wrap([u'a', u'b'])
+        w_joined = api.PyUnicode_Join(w_sep, w_seq)
+        assert space.unwrap(w_joined) == u'a<sep>b'
diff --git a/pypy/module/cpyext/test/test_weakref.py b/pypy/module/cpyext/test/test_weakref.py
--- a/pypy/module/cpyext/test/test_weakref.py
+++ b/pypy/module/cpyext/test/test_weakref.py
@@ -7,6 +7,7 @@
         w_ref = api.PyWeakref_NewRef(w_obj, space.w_None)
         assert w_ref is not None
         assert space.is_w(api.PyWeakref_GetObject(w_ref), w_obj)
+        assert space.is_w(api.PyWeakref_GET_OBJECT(w_ref), w_obj)
         assert space.is_w(api.PyWeakref_LockObject(w_ref), w_obj)
 
         w_obj = space.newtuple([])
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -523,3 +523,11 @@
     copies sizeof(Py_UNICODE) * length bytes from source to target"""
     for i in range(0, length):
         target[i] = source[i]
+
+ at cpython_api([PyObject, PyObject], PyObject)
+def PyUnicode_Format(space, w_format, w_args):
+    return space.mod(w_format, w_args)
+
+ at cpython_api([PyObject, PyObject], PyObject)
+def PyUnicode_Join(space, w_sep, w_seq):
+    return space.call_method(w_sep, 'join', w_seq)
diff --git a/pypy/module/cpyext/weakrefobject.py b/pypy/module/cpyext/weakrefobject.py
--- a/pypy/module/cpyext/weakrefobject.py
+++ b/pypy/module/cpyext/weakrefobject.py
@@ -21,6 +21,10 @@
     """Return the referenced object from a weak reference.  If the referent is
     no longer live, returns None. This function returns a borrowed reference.
     """
+    return PyWeakref_GET_OBJECT(space, w_ref)
+
+ at cpython_api([PyObject], PyObject)
+def PyWeakref_GET_OBJECT(space, w_ref):
     return borrow_from(w_ref, space.call_function(w_ref))
 
 @cpython_api([PyObject], PyObject)
diff --git a/pypy/module/math/__init__.py b/pypy/module/math/__init__.py
--- a/pypy/module/math/__init__.py
+++ b/pypy/module/math/__init__.py
@@ -4,6 +4,7 @@
 
 class Module(MixedModule):
     appleveldefs = {
+       'factorial' : 'app_math.factorial'
     }
 
     interpleveldefs = {
@@ -40,7 +41,6 @@
        'isnan'          : 'interp_math.isnan',
        'trunc'          : 'interp_math.trunc',
        'fsum'           : 'interp_math.fsum',
-       'factorial'      : 'interp_math.factorial',
        'asinh'          : 'interp_math.asinh',
        'acosh'          : 'interp_math.acosh',
        'atanh'          : 'interp_math.atanh',
diff --git a/pypy/module/math/app_math.py b/pypy/module/math/app_math.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/math/app_math.py
@@ -0,0 +1,13 @@
+def factorial(x):
+    """Find x!."""
+    if isinstance(x, float):
+        fl = int(x)
+        if fl != x:
+            raise ValueError("float arguments must be integral")
+        x = fl
+    if x < 0:
+        raise ValueError("x must be >= 0")
+    res = 1
+    for i in range(1, x + 1):
+        res *= i
+    return res
diff --git a/pypy/module/math/interp_math.py b/pypy/module/math/interp_math.py
--- a/pypy/module/math/interp_math.py
+++ b/pypy/module/math/interp_math.py
@@ -373,22 +373,6 @@
                 hi = v
     return space.wrap(hi)
 
-def factorial(space, w_x):
-    """Find x!."""
-    if space.isinstance_w(w_x, space.w_float):
-        fl = space.float_w(w_x)
-        if math.floor(fl) != fl:
-            raise OperationError(space.w_ValueError,
-                                 space.wrap("float arguments must be integral"))
-        w_x = space.long(w_x)
-    x = space.int_w(w_x)
-    if x < 0:
-        raise OperationError(space.w_ValueError, space.wrap("x must be >= 0"))
-    w_res = space.wrap(1)
-    for i in range(1, x + 1):
-        w_res = space.mul(w_res, space.wrap(i))
-    return w_res
-
 def log1p(space, w_x):
     """Find log(x + 1)."""
     return math1(space, rfloat.log1p, w_x)
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -16,6 +16,7 @@
         'absolute': 'interp_ufuncs.absolute',
         'copysign': 'interp_ufuncs.copysign',
         'exp': 'interp_ufuncs.exp',
+        'floor': 'interp_ufuncs.floor',
         'maximum': 'interp_ufuncs.maximum',
         'minimum': 'interp_ufuncs.minimum',
         'negative': 'interp_ufuncs.negative',
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -62,6 +62,10 @@
     return 1.0 / value
 
 @ufunc
+def floor(value):
+    return math.floor(value)
+
+ at ufunc
 def sign(value):
     if value == 0.0:
         return 0.0
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -67,6 +67,15 @@
         for i in range(4):
             assert b[i] == reference[i]
 
+    def test_floor(self):
+        from numpy import array, floor
+
+        reference = [-2.0, -1.0, 0.0, 1.0, 1.0]
+        a = array([-1.4, -1.0, 0.0, 1.0, 1.4])
+        b = floor(a)
+        for i in range(5):
+            assert b[i] == reference[i]
+
     def test_copysign(self):
         from numpy import array, copysign
 
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -21,8 +21,7 @@
 from pypy.module.pypyjit.interp_resop import debug_merge_point_from_boxes
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
-                            'valuestackdepth', 'valuestack_w[*]',
-                            'fastlocals_w[*]',
+                            'valuestackdepth', 'locals_stack_w[*]',
                             'last_exception',
                             'lastblock',
                             'is_being_profiled',
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -14,7 +14,8 @@
             modname, _ = modname.split('.', 1)
         if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
-                       'posix', '_socket', '_sre', '_lsprof', '_weakref']:
+                       'posix', '_socket', '_sre', '_lsprof', '_weakref',
+                       '__pypy__']:
             return True
         return False
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_00_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_00_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -58,6 +58,8 @@
         stdout, stderr = pipe.communicate()
         if stderr.startswith('SKIP:'):
             py.test.skip(stderr)
+        if stderr.startswith('debug_alloc.h:'):   # lldebug builds
+            stderr = ''
         assert not stderr
         #
         # parse the JIT log
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
--- a/pypy/module/pypyjit/test_pypy_c/test_array.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -46,7 +46,7 @@
             guard_no_overflow(descr=<Guard4>)
             i18 = int_add(i7, 1)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i18, i16, i9, i10, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, p5, i18, i16, p8, i9, i10, descr=<Loop0>)
         """)
 
     def test_array_intimg(self):
@@ -83,7 +83,7 @@
             setarrayitem_raw(i11, i8, _, descr=<.*ArrayNoLengthDescr>)
             i28 = int_add(i8, 1)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, p7, i28, i15, i10, i11, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, p5, p6, i28, i15, p9, i10, i11, descr=<Loop0>)
         """)
 
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -187,7 +187,7 @@
             guard_no_overflow(descr=<Guard5>)
             i18 = force_token()
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i8, p7, i17, i9, p10, p11, p12, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, i8, p7, i17, p8, i9, p10, p11, p12, descr=<Loop0>)
         """)
 
     def test_default_and_kw(self):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
--- a/pypy/module/pypyjit/test_pypy_c/test_instance.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -115,7 +115,6 @@
         # ----------------------
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match("""
-            i8 = getfield_gc_pure(p5, descr=<SignedFieldDescr .*W_IntObject.inst_intval.*>)
             i9 = int_lt(i8, i7)
             guard_true(i9, descr=.*)
             guard_not_invalidated(descr=.*)
@@ -125,7 +124,7 @@
             p20 = new_with_vtable(ConstClass(W_IntObject))
             setfield_gc(p20, i11, descr=<SignedFieldDescr.*W_IntObject.inst_intval .*>)
             setfield_gc(ConstPtr(ptr21), p20, descr=<GcPtrFieldDescr .*TypeCell.inst_w_value .*>)
-            jump(p0, p1, p2, p3, p4, p20, p6, i7, descr=<Loop.>)
+            jump(p0, p1, p2, p3, p4, p20, p6, i11, i7, descr=<Loop.>)
         """)
 
     def test_oldstyle_newstyle_mix(self):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_intbound.py b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
--- a/pypy/module/pypyjit/test_pypy_c/test_intbound.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_intbound.py
@@ -97,7 +97,7 @@
             guard_no_overflow(descr=...)
             i17 = int_add(i8, 1)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, i14, i12, i17, p8, i9, descr=<Loop0>)
         """)
 
     def test_intbound_sub_lt(self):
@@ -149,7 +149,7 @@
             guard_no_overflow(descr=...)
             i19 = int_add(i8, 1)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, i16, i14, i19, p8, i9, descr=<Loop0>)
         """)
 
     def test_intbound_addmul_ge(self):
@@ -177,7 +177,7 @@
             guard_no_overflow(descr=...)
             i21 = int_add(i8, 1)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, i18, i14, i21, p8, descr=<Loop0>)
         """)
 
     def test_intbound_eq(self):
@@ -209,7 +209,7 @@
             guard_no_overflow(descr=...)
             i16 = int_add(i8, 1)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, p6, i14, i16, p8, descr=<Loop0>)
         """)
 
     def test_intbound_mul(self):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_misc.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -167,7 +167,7 @@
             guard_false(i16, descr=<Guard3>)
             p17 = getarrayitem_gc(p15, i12, descr=<GcPtrArrayDescr>)
             i19 = int_add(i12, 1)
-            setfield_gc(p4, i19, descr=<SignedFieldDescr .*W_AbstractSeqIterObject.inst_index .*>)
+            setfield_gc(p9, i19, descr=<SignedFieldDescr .*W_AbstractSeqIterObject.inst_index .*>)
             guard_nonnull_class(p17, 146982464, descr=<Guard4>)
             i21 = getfield_gc(p17, descr=<SignedFieldDescr .*W_ArrayTypei.inst_len .*>)
             i23 = int_lt(0, i21)
@@ -179,7 +179,7 @@
             i28 = int_add_ovf(i10, i25)
             guard_no_overflow(descr=<Guard7>)
             --TICK--
-            jump(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, i28, i25, i19, i13, p14, p15, descr=<Loop0>)
+            jump(p0, p1, p2, p3, p4, p5, p6, i28, i25, p9, p10, p11, i19, i13, p14, p15, descr=<Loop0>)
         """)
 
 
diff --git a/pypy/objspace/flow/flowcontext.py b/pypy/objspace/flow/flowcontext.py
--- a/pypy/objspace/flow/flowcontext.py
+++ b/pypy/objspace/flow/flowcontext.py
@@ -384,8 +384,9 @@
     # hack for unrolling iterables, don't use this
     def replace_in_stack(self, oldvalue, newvalue):
         w_new = Constant(newvalue)
-        stack_items_w = self.crnt_frame.valuestack_w
-        for i in range(self.crnt_frame.valuestackdepth-1, -1, -1):
+        f = self.crnt_frame
+        stack_items_w = f.locals_stack_w
+        for i in range(f.valuestackdepth-1, f.nlocals-1, -1):
             w_v = stack_items_w[i]
             if isinstance(w_v, Constant):
                 if w_v.value is oldvalue:
diff --git a/pypy/objspace/flow/framestate.py b/pypy/objspace/flow/framestate.py
--- a/pypy/objspace/flow/framestate.py
+++ b/pypy/objspace/flow/framestate.py
@@ -10,7 +10,7 @@
     def __init__(self, state):
         if isinstance(state, PyFrame):
             # getfastscope() can return real None, for undefined locals
-            data = state.getfastscope() + state.savevaluestack()
+            data = state.save_locals_stack()
             if state.last_exception is None:
                 data.append(Constant(None))
                 data.append(Constant(None))
@@ -36,11 +36,9 @@
 
     def restoreframe(self, frame):
         if isinstance(frame, PyFrame):
-            fastlocals = len(frame.fastlocals_w)
             data = self.mergeable[:]
             recursively_unflatten(frame.space, data)
-            frame.setfastscope(data[:fastlocals])  # Nones == undefined locals
-            frame.restorevaluestack(data[fastlocals:-2])
+            frame.restore_locals_stack(data[:-2])  # Nones == undefined locals
             if data[-2] == Constant(None):
                 assert data[-1] == Constant(None)
                 frame.last_exception = None
diff --git a/pypy/objspace/flow/test/test_framestate.py b/pypy/objspace/flow/test/test_framestate.py
--- a/pypy/objspace/flow/test/test_framestate.py
+++ b/pypy/objspace/flow/test/test_framestate.py
@@ -25,7 +25,7 @@
         dummy = Constant(None)
         #dummy.dummy = True
         arg_list = ([Variable() for i in range(formalargcount)] +
-                    [dummy] * (len(frame.fastlocals_w) - formalargcount))
+                    [dummy] * (frame.nlocals - formalargcount))
         frame.setfastscope(arg_list)
         return frame
 
@@ -42,7 +42,7 @@
     def test_neq_hacked_framestate(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs2 = FrameState(frame)
         assert fs1 != fs2
 
@@ -55,7 +55,7 @@
     def test_union_on_hacked_framestates(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs2 = FrameState(frame)
         assert fs1.union(fs2) == fs2  # fs2 is more general
         assert fs2.union(fs1) == fs2  # fs2 is more general
@@ -63,7 +63,7 @@
     def test_restore_frame(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs1.restoreframe(frame)
         assert fs1 == FrameState(frame)
 
@@ -82,25 +82,26 @@
     def test_getoutputargs(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Variable()
+        frame.locals_stack_w[frame.nlocals-1] = Variable()
         fs2 = FrameState(frame)
         outputargs = fs1.getoutputargs(fs2)
         # 'x' -> 'x' is a Variable
-        # fastlocals_w[-1] -> fastlocals_w[-1] is Constant(None)
-        assert outputargs == [frame.fastlocals_w[0], Constant(None)]
+        # locals_w[n-1] -> locals_w[n-1] is Constant(None)
+        assert outputargs == [frame.locals_stack_w[0], Constant(None)]
 
     def test_union_different_constants(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Constant(42)
+        frame.locals_stack_w[frame.nlocals-1] = Constant(42)
         fs2 = FrameState(frame)
         fs3 = fs1.union(fs2)
         fs3.restoreframe(frame)
-        assert isinstance(frame.fastlocals_w[-1], Variable) # generalized
+        assert isinstance(frame.locals_stack_w[frame.nlocals-1], Variable)
+                                 # ^^^ generalized
 
     def test_union_spectag(self):
         frame = self.getframe(self.func_simple)
         fs1 = FrameState(frame)
-        frame.fastlocals_w[-1] = Constant(SpecTag())
+        frame.locals_stack_w[frame.nlocals-1] = Constant(SpecTag())
         fs2 = FrameState(frame)
         assert fs1.union(fs2) is None   # UnionError
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1,13 +1,14 @@
 import py, sys
 from pypy.objspace.std.model import registerimplementation, W_Object
 from pypy.objspace.std.register_all import register_all
+from pypy.objspace.std.settype import set_typedef as settypedef
 from pypy.interpreter import gateway
 from pypy.interpreter.argument import Signature
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.module.__builtin__.__init__ import BUILTIN_TO_INDEX, OPTIMIZED_BUILTINS
 
 from pypy.rlib.objectmodel import r_dict, we_are_translated
-from pypy.objspace.std.settype import set_typedef as settypedef
+from pypy.rlib.debug import mark_dict_non_null
 
 def _is_str(space, w_key):
     return space.is_w(space.type(w_key), space.w_str)
@@ -59,7 +60,8 @@
 
     def initialize_as_rdict(self):
         assert self.r_dict_content is None
-        self.r_dict_content = r_dict(self.space.eq_w, self.space.hash_w)
+        self.r_dict_content = r_dict(self.space.eq_w, self.space.hash_w,
+                                     force_non_null=True)
         return self.r_dict_content
 
 
@@ -308,6 +310,7 @@
     def __init__(self, space):
         self.space = space
         self.content = {}
+        mark_dict_non_null(self.content)
 
     def impl_setitem(self, w_key, w_value):
         space = self.space
@@ -317,6 +320,7 @@
             self._as_rdict().impl_fallback_setitem(w_key, w_value)
 
     def impl_setitem_str(self, key, w_value):
+        assert key is not None
         self.content[key] = w_value
 
     def impl_setdefault(self, w_key, w_default):
@@ -342,6 +346,7 @@
         return len(self.content)
 
     def impl_getitem_str(self, key):
+        assert key is not None
         return self.content.get(key, None)
 
     def impl_getitem(self, w_key):
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -311,6 +311,10 @@
                 classofinstance=classofinstance,
                 strdict=strdict)
 
+    def newset(self):
+        from pypy.objspace.std.setobject import newset
+        return W_SetObject(self, newset(self))
+
     def newslice(self, w_start, w_end, w_step):
         return W_SliceObject(w_start, w_end, w_step)
 
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -112,7 +112,7 @@
 # some helper functions
 
 def newset(space):
-    return r_dict(space.eq_w, space.hash_w)
+    return r_dict(space.eq_w, space.hash_w, force_non_null=True)
 
 def make_setdata_from_w_iterable(space, w_iterable=None):
     """Return a new r_dict with the content of w_iterable."""
diff --git a/pypy/objspace/std/test/test_setobject.py b/pypy/objspace/std/test/test_setobject.py
--- a/pypy/objspace/std/test/test_setobject.py
+++ b/pypy/objspace/std/test/test_setobject.py
@@ -50,6 +50,10 @@
         u = self.space.wrap(set('simsalabim'))
         assert self.space.eq_w(s,u)
 
+    def test_space_newset(self):
+        s = self.space.newset()
+        assert self.space.str_w(self.space.repr(s)) == 'set([])'
+
 class AppTestAppSetTest:
     def test_subtype(self):
         class subset(set):pass
diff --git a/pypy/rlib/debug.py b/pypy/rlib/debug.py
--- a/pypy/rlib/debug.py
+++ b/pypy/rlib/debug.py
@@ -262,6 +262,28 @@
         return hop.inputarg(hop.args_r[0], arg=0)
 
 
+def mark_dict_non_null(d):
+    """ Mark dictionary as having non-null keys and values. A warning would
+    be emitted (not an error!) in case annotation disagrees.
+    """
+    assert isinstance(d, dict)
+    return d
+
+
+class DictMarkEntry(ExtRegistryEntry):
+    _about_ = mark_dict_non_null
+    
+    def compute_result_annotation(self, s_dict):
+        from pypy.annotation.model import SomeDict, s_None
+
+        assert isinstance(s_dict, SomeDict)
+        s_dict.dictdef.force_non_null = True
+        return s_dict
+
+    def specialize_call(self, hop):
+        hop.exception_cannot_occur()
+        return hop.inputarg(hop.args_r[0], arg=0)
+
 class IntegerCanBeNegative(Exception):
     pass
 
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -448,10 +448,11 @@
     The functions key_eq() and key_hash() are used by the key comparison
     algorithm."""
 
-    def __init__(self, key_eq, key_hash):
+    def __init__(self, key_eq, key_hash, force_non_null=False):
         self._dict = {}
         self.key_eq = key_eq
         self.key_hash = key_hash
+        self.force_non_null = force_non_null
 
     def __getitem__(self, key):
         return self._dict[_r_dictkey(self, key)]
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -272,7 +272,9 @@
     if isinstance(TP.OF, lltype.Ptr) and TP.OF.TO._gckind == 'gc':
         # perform a write barrier that copies necessary flags from
         # source to dest
-        if not llop.gc_writebarrier_before_copy(lltype.Bool, source, dest):
+        if not llop.gc_writebarrier_before_copy(lltype.Bool, source, dest,
+                                                source_start, dest_start,
+                                                length):
             # if the write barrier is not supported, copy by hand
             for i in range(length):
                 dest[i + dest_start] = source[i + source_start]
diff --git a/pypy/rlib/ropenssl.py b/pypy/rlib/ropenssl.py
--- a/pypy/rlib/ropenssl.py
+++ b/pypy/rlib/ropenssl.py
@@ -151,7 +151,7 @@
                 [rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT], lltype.Void))],
              lltype.Void)
 ssl_external('CRYPTO_set_id_callback',
-             [lltype.Ptr(lltype.FuncType([], rffi.INT))],
+             [lltype.Ptr(lltype.FuncType([], rffi.LONG))],
              lltype.Void)
              
 if HAVE_OPENSSL_RAND:
diff --git a/pypy/rlib/rsdl/RMix.py b/pypy/rlib/rsdl/RMix.py
--- a/pypy/rlib/rsdl/RMix.py
+++ b/pypy/rlib/rsdl/RMix.py
@@ -52,7 +52,8 @@
                                ChunkPtr)
 
 def LoadWAV(filename_ccharp):
-    return LoadWAV_RW(RSDL.RWFromFile(filename_ccharp, rffi.str2charp('rb')), 1)
+    with rffi.scoped_str2charp('rb') as mode:
+        return LoadWAV_RW(RSDL.RWFromFile(filename_ccharp, mode), 1)
 
 
 PlayChannelTimed    = external('Mix_PlayChannelTimed',
@@ -64,4 +65,4 @@
 
 """Returns zero if the channel is not playing. 
 Otherwise if you passed in -1, the number of channels playing is returned"""
-ChannelPlaying  = external('Mix_Playing', [ rffi.INT])
\ No newline at end of file
+ChannelPlaying  = external('Mix_Playing', [rffi.INT], rffi.INT)
diff --git a/pypy/rlib/test/test_debug.py b/pypy/rlib/test/test_debug.py
--- a/pypy/rlib/test/test_debug.py
+++ b/pypy/rlib/test/test_debug.py
@@ -1,11 +1,12 @@
 
 import py
-from pypy.rlib.debug import check_annotation, make_sure_not_resized
-from pypy.rlib.debug import debug_print, debug_start, debug_stop
-from pypy.rlib.debug import have_debug_prints, debug_offset, debug_flush
-from pypy.rlib.debug import check_nonneg, IntegerCanBeNegative
+from pypy.rlib.debug import (check_annotation, make_sure_not_resized,
+                             debug_print, debug_start, debug_stop,
+                             have_debug_prints, debug_offset, debug_flush,
+                             check_nonneg, IntegerCanBeNegative,
+                             mark_dict_non_null)
 from pypy.rlib import debug
-from pypy.rpython.test.test_llinterp import interpret
+from pypy.rpython.test.test_llinterp import interpret, gengraph
 
 def test_check_annotation():
     class Error(Exception):
@@ -52,8 +53,17 @@
     py.test.raises(ListChangeUnallowed, interpret, f, [], 
                    list_comprehension_operations=True)
 
+def test_mark_dict_non_null():
+    def f():
+        d = {"ac": "bx"}
+        mark_dict_non_null(d)
+        return d
 
-class DebugTests:
+    t, typer, graph = gengraph(f, [])
+    assert sorted(graph.returnblock.inputargs[0].concretetype.TO.entries.TO.OF._flds.keys()) == ['key', 'value']
+
+
+class DebugTests(object):
 
     def test_debug_print_start_stop(self):
         def f(x):
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -737,9 +737,12 @@
     def op_zero_gc_pointers_inside(self, obj):
         raise NotImplementedError("zero_gc_pointers_inside")
 
-    def op_gc_writebarrier_before_copy(self, source, dest):
+    def op_gc_writebarrier_before_copy(self, source, dest,
+                                       source_start, dest_start, length):
         if hasattr(self.heap, 'writebarrier_before_copy'):
-            return self.heap.writebarrier_before_copy(source, dest)
+            return self.heap.writebarrier_before_copy(source, dest,
+                                                      source_start, dest_start,
+                                                      length)
         else:
             return True
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -37,7 +37,9 @@
     if far_regions:
         import random
         pieces = far_regions._ll2ctypes_pieces
-        num = random.randrange(len(pieces))
+        num = random.randrange(len(pieces)+1)
+        if num == len(pieces):
+            return ctype()
         i1, stop = pieces[num]
         i2 = i1 + ((ctypes.sizeof(ctype) or 1) + 7) & ~7
         if i2 > stop:
diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -473,12 +473,16 @@
     checkadr(addr2)
     return addr1 - addr2
 
-def op_gc_writebarrier_before_copy(source, dest):
+def op_gc_writebarrier_before_copy(source, dest,
+                                   source_start, dest_start, length):
     A = lltype.typeOf(source)
     assert A == lltype.typeOf(dest)
     assert isinstance(A.TO, lltype.GcArray)
     assert isinstance(A.TO.OF, lltype.Ptr)
     assert A.TO.OF.TO._gckind == 'gc'
+    assert type(source_start) is int
+    assert type(dest_start) is int
+    assert type(length) is int
     return True
 
 def op_getfield(p, name):
diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -7,6 +7,7 @@
 from pypy.rlib.objectmodel import hlinvoke
 from pypy.rlib import objectmodel
 from pypy.rpython import rmodel
+from pypy.rpython.error import TyperError
 
 HIGHEST_BIT = intmask(1 << (LONG_BIT - 1))
 MASK = intmask(HIGHEST_BIT - 1)
@@ -40,7 +41,7 @@
 class DictRepr(AbstractDictRepr):
 
     def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
-                 custom_eq_hash=None):
+                 custom_eq_hash=None, force_non_null=False):
         self.rtyper = rtyper
         self.DICT = lltype.GcForwardReference()
         self.lowleveltype = lltype.Ptr(self.DICT)
@@ -59,6 +60,7 @@
         self.dictvalue = dictvalue
         self.dict_cache = {}
         self._custom_eq_hash_repr = custom_eq_hash
+        self.force_non_null = force_non_null
         # setup() needs to be called to finish this initialization
 
     def _externalvsinternal(self, rtyper, item_repr):
@@ -95,6 +97,13 @@
             s_value = self.dictvalue.s_value
             nullkeymarker = not self.key_repr.can_ll_be_null(s_key)
             nullvaluemarker = not self.value_repr.can_ll_be_null(s_value)
+            if self.force_non_null:
+                if not nullkeymarker:
+                    rmodel.warning("%s can be null, but forcing non-null in dict key" % s_key)
+                    nullkeymarker = True
+                if not nullvaluemarker:
+                    rmodel.warning("%s can be null, but forcing non-null in dict value" % s_value)
+                    nullvaluemarker = True
             dummykeyobj = self.key_repr.get_ll_dummyval_obj(self.rtyper,
                                                             s_key)
             dummyvalueobj = self.value_repr.get_ll_dummyval_obj(self.rtyper,
@@ -204,7 +213,7 @@
         if dictobj is None:
             return lltype.nullptr(self.DICT)
         if not isinstance(dictobj, (dict, objectmodel.r_dict)):
-            raise TyperError("expected a dict: %r" % (dictobj,))
+            raise TypeError("expected a dict: %r" % (dictobj,))
         try:
             key = Constant(dictobj)
             return self.dict_cache[key]
@@ -645,12 +654,15 @@
     pass
 
 
-def rtype_r_dict(hop):
+def rtype_r_dict(hop, i_force_non_null=None):
     r_dict = hop.r_result
     if not r_dict.custom_eq_hash:
         raise TyperError("r_dict() call does not return an r_dict instance")
-    v_eqfn, v_hashfn = hop.inputargs(r_dict.r_rdict_eqfn,
-                                     r_dict.r_rdict_hashfn)
+    v_eqfn = hop.inputarg(r_dict.r_rdict_eqfn, arg=0)
+    v_hashfn = hop.inputarg(r_dict.r_rdict_hashfn, arg=1)
+    if i_force_non_null is not None:
+        assert i_force_non_null == 2
+        hop.inputarg(lltype.Void, arg=2)
     cDICT = hop.inputconst(lltype.Void, r_dict.DICT)
     hop.exception_cannot_occur()
     v_result = hop.gendirectcall(ll_newdict, cDICT)
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -4,7 +4,7 @@
 from pypy.rlib.objectmodel import malloc_zero_filled, we_are_translated
 from pypy.rlib.objectmodel import _hash_string, enforceargs
 from pypy.rlib.debug import ll_assert
-from pypy.rlib.jit import purefunction, we_are_jitted
+from pypy.rlib.jit import purefunction, we_are_jitted, dont_look_inside
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.robject import PyObjRepr, pyobj_repr
 from pypy.rpython.rmodel import inputconst, IntegerRepr
@@ -57,6 +57,8 @@
                 llmemory.itemoffsetof(TP.chars, 0) +
                 llmemory.sizeof(CHAR_TP) * item)
 
+    # It'd be nice to be able to look inside this function.
+    @dont_look_inside
     @enforceargs(None, None, int, int, int)
     def copy_string_contents(src, dst, srcstart, dststart, length):
         assert srcstart >= 0
diff --git a/pypy/rpython/memory/gc/generation.py b/pypy/rpython/memory/gc/generation.py
--- a/pypy/rpython/memory/gc/generation.py
+++ b/pypy/rpython/memory/gc/generation.py
@@ -517,7 +517,8 @@
             objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
             self.last_generation_root_objects.append(addr_struct)
 
-    def writebarrier_before_copy(self, source_addr, dest_addr):
+    def writebarrier_before_copy(self, source_addr, dest_addr,
+                                 source_start, dest_start, length):
         """ This has the same effect as calling writebarrier over
         each element in dest copied from source, except it might reset
         one of the following flags a bit too eagerly, which means we'll have
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -75,10 +75,16 @@
 
 first_gcflag = 1 << (LONG_BIT//2)
 
-# The following flag is never set on young objects.  It is initially set
-# on all prebuilt and old objects, and gets cleared by the write_barrier()
-# when we write in them a pointer to a young object.
-GCFLAG_NO_YOUNG_PTRS = first_gcflag << 0
+# The following flag is set on objects if we need to do something to
+# track the young pointers that it might contain.  The flag is not set
+# on young objects (unless they are large arrays, see below), and we
+# simply assume that any young object can point to any other young object.
+# For old and prebuilt objects, the flag is usually set, and is cleared
+# when we write a young pointer to it.  For large arrays with
+# GCFLAG_HAS_CARDS, we rely on card marking to track where the
+# young pointers are; the flag GCFLAG_TRACK_YOUNG_PTRS is set in this
+# case too, to speed up the write barrier.
+GCFLAG_TRACK_YOUNG_PTRS = first_gcflag << 0
 
 # The following flag is set on some prebuilt objects.  The flag is set
 # unless the object is already listed in 'prebuilt_root_objects'.
@@ -246,17 +252,23 @@
         self.ac = ArenaCollectionClass(arena_size, page_size,
                                        small_request_threshold)
         #
-        # Used by minor collection: a list of non-young objects that
+        # Used by minor collection: a list of (mostly non-young) objects that
         # (may) contain a pointer to a young object.  Populated by
-        # the write barrier.
-        self.old_objects_pointing_to_young = self.AddressStack()
+        # the write barrier: when we clear GCFLAG_TRACK_YOUNG_PTRS, we
+        # add it to this list.
+        class Cls(self.AddressStack):
+            def append(self2, addr):
+                assert addr not in self2.tolist()
+                self.AddressStack.append(self2, addr)
+        self.objects_pointing_to_young = self.AddressStack()
         #
-        # Similar to 'old_objects_pointing_to_young', but lists objects
+        # Similar to 'objects_pointing_to_young', but lists objects
         # that have the GCFLAG_CARDS_SET bit.  For large arrays.  Note
         # that it is possible for an object to be listed both in here
-        # and in 'old_objects_pointing_to_young', in which case we
+        # and in 'objects_pointing_to_young', in which case we
         # should just clear the cards and trace it fully, as usual.
-        self.old_objects_with_cards_set = self.AddressStack()
+        # Note also that young array objects may be added to this list.
+        self.objects_with_cards_set = self.AddressStack()
         #
         # A list of all prebuilt GC objects that contain pointers to the heap
         self.prebuilt_root_objects = self.AddressStack()
@@ -625,7 +637,7 @@
             # if 'can_make_young'.  The interesting case of 'can_make_young'
             # is for large objects, bigger than the 'large_objects' threshold,
             # which are raw-malloced but still young.
-            extra_flags = GCFLAG_NO_YOUNG_PTRS
+            extra_flags = GCFLAG_TRACK_YOUNG_PTRS
             #
         else:
             # No, so proceed to allocate it externally with raw_malloc().
@@ -643,7 +655,7 @@
                 # Reserve N extra words containing card bits before the object.
                 extra_words = self.card_marking_words_for_length(length)
                 cardheadersize = WORD * extra_words
-                extra_flags = GCFLAG_HAS_CARDS
+                extra_flags = GCFLAG_HAS_CARDS | GCFLAG_TRACK_YOUNG_PTRS
                 # note that if 'can_make_young', then card marking will only
                 # be used later, after (and if) the object becomes old
             #
@@ -686,7 +698,7 @@
                 self.young_rawmalloced_objects.add(result + size_gc_header)
             else:
                 self.old_rawmalloced_objects.append(result + size_gc_header)
-                extra_flags |= GCFLAG_NO_YOUNG_PTRS
+                extra_flags |= GCFLAG_TRACK_YOUNG_PTRS
         #
         # Common code to fill the header and length of the object.
         self.init_gc_object(result, typeid, extra_flags)
@@ -777,7 +789,7 @@
     def init_gc_object_immortal(self, addr, typeid16, flags=0):
         # For prebuilt GC objects, the flags must contain
         # GCFLAG_NO_xxx_PTRS, at least initially.
-        flags |= GCFLAG_NO_HEAP_PTRS | GCFLAG_NO_YOUNG_PTRS
+        flags |= GCFLAG_NO_HEAP_PTRS | GCFLAG_TRACK_YOUNG_PTRS
         self.init_gc_object(addr, typeid16, flags)
 
     def is_in_nursery(self, addr):
@@ -870,8 +882,8 @@
         ll_assert(not self.is_in_nursery(obj),
                   "object in nursery after collection")
         # similarily, all objects should have this flag:
-        ll_assert(self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS,
-                  "missing GCFLAG_NO_YOUNG_PTRS")
+        ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS,
+                  "missing GCFLAG_TRACK_YOUNG_PTRS")
         # the GCFLAG_VISITED should not be set between collections
         ll_assert(self.header(obj).tid & GCFLAG_VISITED == 0,
                   "unexpected GCFLAG_VISITED")
@@ -910,7 +922,7 @@
     # for the JIT: a minimal description of the write_barrier() method
     # (the JIT assumes it is of the shape
     #  "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
-    JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
+    JIT_WB_IF_FLAG = GCFLAG_TRACK_YOUNG_PTRS
 
     @classmethod
     def JIT_max_size_of_young_obj(cls):
@@ -921,11 +933,11 @@
         return cls.minimal_size_in_nursery
 
     def write_barrier(self, newvalue, addr_struct):
-        if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
+        if self.header(addr_struct).tid & GCFLAG_TRACK_YOUNG_PTRS:
             self.remember_young_pointer(addr_struct, newvalue)
 
     def write_barrier_from_array(self, newvalue, addr_array, index):
-        if self.header(addr_array).tid & GCFLAG_NO_YOUNG_PTRS:
+        if self.header(addr_array).tid & GCFLAG_TRACK_YOUNG_PTRS:
             if self.card_page_indices > 0:     # <- constant-folded
                 self.remember_young_pointer_from_array2(addr_array, index)
             else:
@@ -943,20 +955,23 @@
         def remember_young_pointer(addr_struct, newvalue):
             # 'addr_struct' is the address of the object in which we write.
             # 'newvalue' is the address that we are going to write in there.
+            # We know that 'addr_struct' has GCFLAG_TRACK_YOUNG_PTRS so far.
+            #
             if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(self.debug_is_old_object(addr_struct),
-                          "young object with GCFLAG_NO_YOUNG_PTRS")
+                ll_assert(self.debug_is_old_object(addr_struct) or
+                          self.header(addr_struct).tid & GCFLAG_HAS_CARDS != 0,
+                      "young object with GCFLAG_TRACK_YOUNG_PTRS and no cards")
             #
-            # If it seems that what we are writing is a pointer to the nursery
+            # If it seems that what we are writing is a pointer to a young obj
             # (as checked with appears_to_be_young()), then we need
-            # to remove the flag GCFLAG_NO_YOUNG_PTRS and add the old object
-            # to the list 'old_objects_pointing_to_young'.  We know that
+            # to remove the flag GCFLAG_TRACK_YOUNG_PTRS and add the object
+            # to the list 'objects_pointing_to_young'.  We know that
             # 'addr_struct' cannot be in the nursery, because nursery objects
-            # never have the flag GCFLAG_NO_YOUNG_PTRS to start with.
+            # never have the flag GCFLAG_TRACK_YOUNG_PTRS to start with.
             objhdr = self.header(addr_struct)
             if self.appears_to_be_young(newvalue):
-                self.old_objects_pointing_to_young.append(addr_struct)
-                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+                self.objects_pointing_to_young.append(addr_struct)
+                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
             #
             # Second part: if 'addr_struct' is actually a prebuilt GC
             # object and it's the first time we see a write to it, we
@@ -980,16 +995,18 @@
             # 'addr_array' is the address of the object in which we write,
             # which must have an array part;  'index' is the index of the
             # item that is (or contains) the pointer that we write.
-            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(self.debug_is_old_object(addr_array),
-                          "young array with GCFLAG_NO_YOUNG_PTRS")
+            # We know that 'addr_array' has GCFLAG_TRACK_YOUNG_PTRS so far.
+            #
             objhdr = self.header(addr_array)
             if objhdr.tid & GCFLAG_HAS_CARDS == 0:
                 #
+                if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
+                    ll_assert(self.debug_is_old_object(addr_array),
+                        "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
+                #
                 # no cards, use default logic.  Mostly copied from above.
-                self.old_objects_pointing_to_young.append(addr_array)
-                objhdr = self.header(addr_array)
-                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+                self.objects_pointing_to_young.append(addr_array)
+                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
                 if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
                     objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
                     self.prebuilt_root_objects.append(addr_array)
@@ -1002,9 +1019,7 @@
             bitmask = 1 << (bitindex & 7)
             #
             # If the bit is already set, leave now.
-            size_gc_header = self.gcheaderbuilder.size_gc_header
-            addr_byte = addr_array - size_gc_header
-            addr_byte = llarena.getfakearenaaddress(addr_byte) + (~byteindex)
+            addr_byte = self.get_card(addr_array, byteindex)
             byte = ord(addr_byte.char[0])
             if byte & bitmask:
                 return
@@ -1016,7 +1031,7 @@
             addr_byte.char[0] = chr(byte | bitmask)
             #
             if objhdr.tid & GCFLAG_CARDS_SET == 0:
-                self.old_objects_with_cards_set.append(addr_array)
+                self.objects_with_cards_set.append(addr_array)
                 objhdr.tid |= GCFLAG_CARDS_SET
 
         remember_young_pointer_from_array2._dont_inline_ = True
@@ -1026,9 +1041,6 @@
 
         # xxx trying it out for the JIT: a 3-arguments version of the above
         def remember_young_pointer_from_array3(addr_array, index, newvalue):
-            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(self.debug_is_old_object(addr_array),
-                          "young array with GCFLAG_NO_YOUNG_PTRS")
             objhdr = self.header(addr_array)
             #
             # a single check for the common case of neither GCFLAG_HAS_CARDS
@@ -1044,8 +1056,8 @@
             else:
                 # case with cards.
                 #
-                # If the newly written address does not actually point to the
-                # nursery, leave now.
+                # If the newly written address does not actually point to a
+                # young object, leave now.
                 if not self.appears_to_be_young(newvalue):
                     return
                 #
@@ -1056,46 +1068,53 @@
                 bitmask = 1 << (bitindex & 7)
                 #
                 # If the bit is already set, leave now.
-                size_gc_header = self.gcheaderbuilder.size_gc_header
-                addr_byte = addr_array - size_gc_header
-                addr_byte = llarena.getfakearenaaddress(addr_byte) + \
-                            (~byteindex)
+                addr_byte = self.get_card(addr_array, byteindex)
                 byte = ord(addr_byte.char[0])
                 if byte & bitmask:
                     return
                 addr_byte.char[0] = chr(byte | bitmask)
                 #
                 if objhdr.tid & GCFLAG_CARDS_SET == 0:
-                    self.old_objects_with_cards_set.append(addr_array)
+                    self.objects_with_cards_set.append(addr_array)
                     objhdr.tid |= GCFLAG_CARDS_SET
                 return
             #
             # Logic for the no-cards case, put here to minimize the number
             # of checks done at the start of the function
+            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
+                ll_assert(self.debug_is_old_object(addr_array),
+                        "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
+            #
             if self.appears_to_be_young(newvalue):
-                self.old_objects_pointing_to_young.append(addr_array)
-                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+                self.objects_pointing_to_young.append(addr_array)
+                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
 
         remember_young_pointer_from_array3._dont_inline_ = True
         assert self.card_page_indices > 0
         self.remember_young_pointer_from_array3 = (
             remember_young_pointer_from_array3)
 
+    def get_card(self, obj, byteindex):
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        addr_byte = obj - size_gc_header
+        return llarena.getfakearenaaddress(addr_byte) + (~byteindex)
+
 
     def assume_young_pointers(self, addr_struct):
         """Called occasionally by the JIT to mean ``assume that 'addr_struct'
         may now contain young pointers.''
         """
         objhdr = self.header(addr_struct)
-        if objhdr.tid & GCFLAG_NO_YOUNG_PTRS:
-            self.old_objects_pointing_to_young.append(addr_struct)
-            objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+        if objhdr.tid & GCFLAG_TRACK_YOUNG_PTRS:
+            self.objects_pointing_to_young.append(addr_struct)
+            objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
             #
             if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
                 objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
                 self.prebuilt_root_objects.append(addr_struct)
 
-    def writebarrier_before_copy(self, source_addr, dest_addr):
+    def writebarrier_before_copy(self, source_addr, dest_addr,
+                                 source_start, dest_start, length):
         """ This has the same effect as calling writebarrier over
         each element in dest copied from source, except it might reset
         one of the following flags a bit too eagerly, which means we'll have
@@ -1103,15 +1122,36 @@
         """
         source_hdr = self.header(source_addr)
         dest_hdr = self.header(dest_addr)
-        if dest_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
+        if dest_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
             return True
         # ^^^ a fast path of write-barrier
         #
-        if (source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0 or
-            source_hdr.tid & GCFLAG_CARDS_SET != 0):
+        if source_hdr.tid & GCFLAG_HAS_CARDS != 0:
+            #
+            if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
+                # The source object may have random young pointers.
+                # Return False to mean "do it manually in ll_arraycopy".
+                return False
+            #
+            if source_hdr.tid & GCFLAG_CARDS_SET == 0:
+                # The source object has no young pointers at all.  Done.
+                return True
+            #
+            if dest_hdr.tid & GCFLAG_HAS_CARDS == 0:
+                # The dest object doesn't have cards.  Do it manually.
+                return False
+            #
+            if source_start != 0 or dest_start != 0:
+                # Misaligned.  Do it manually.
+                return False
+            #
+            self.manually_copy_card_bits(source_addr, dest_addr, length)
+            return True
+        #
+        if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
             # there might be in source a pointer to a young object
-            self.old_objects_pointing_to_young.append(dest_addr)
-            dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+            self.objects_pointing_to_young.append(dest_addr)
+            dest_hdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
         #
         if dest_hdr.tid & GCFLAG_NO_HEAP_PTRS:
             if source_hdr.tid & GCFLAG_NO_HEAP_PTRS == 0:
@@ -1119,6 +1159,22 @@
                 self.prebuilt_root_objects.append(dest_addr)
         return True
 
+    def manually_copy_card_bits(self, source_addr, dest_addr, length):
+        # manually copy the individual card marks from source to dest
+        bytes = self.card_marking_bytes_for_length(length)
+        #
+        i = 0
+        while i < bytes:
+            addr_srcbyte = self.get_card(source_addr, i)
+            addr_dstbyte = self.get_card(dest_addr, i)
+            byte = ord(addr_srcbyte.char[0])
+            addr_dstbyte.char[0] = chr(ord(addr_dstbyte.char[0]) | byte)
+            i += 1
+        #
+        dest_hdr = self.header(dest_addr)
+        if dest_hdr.tid & GCFLAG_CARDS_SET == 0:
+            self.objects_with_cards_set.append(dest_addr)
+            dest_hdr.tid |= GCFLAG_CARDS_SET
 
     # ----------
     # Nursery collection
@@ -1135,20 +1191,28 @@
         # Note that during this step, we ignore references to further
         # young objects; only objects directly referenced by roots
         # are copied out or flagged.  They are also added to the list
-        # 'old_objects_pointing_to_young'.
+        # 'objects_pointing_to_young'.
         self.collect_roots_in_nursery()
         #
-        # If we are using card marking, do a partial trace of the arrays
-        # that are flagged with GCFLAG_CARDS_SET.
-        if self.card_page_indices > 0:
-            self.collect_cardrefs_to_nursery()
-        #
-        # Now trace objects from 'old_objects_pointing_to_young'.
-        # All nursery objects they reference are copied out of the
-        # nursery, and again added to 'old_objects_pointing_to_young'.
-        # All young raw-malloced object found is flagged GCFLAG_VISITED.
-        # We proceed until 'old_objects_pointing_to_young' is empty.
-        self.collect_oldrefs_to_nursery()
+        while True:
+            # If we are using card marking, do a partial trace of the arrays
+            # that are flagged with GCFLAG_CARDS_SET.
+            if self.card_page_indices > 0:
+                self.collect_cardrefs_to_nursery()
+            #
+            # Now trace objects from 'objects_pointing_to_young'.
+            # All nursery objects they reference are copied out of the
+            # nursery, and again added to 'objects_pointing_to_young'.
+            # All young raw-malloced object found is flagged GCFLAG_VISITED.
+            # We proceed until 'objects_pointing_to_young' is empty.
+            self.collect_oldrefs_to_nursery()
+            #
+            # We have to loop back if collect_oldrefs_to_nursery caused
+            # new objects to show up in objects_with_cards_set
+            if self.card_page_indices > 0:
+                if self.objects_with_cards_set.non_empty():
+                    continue
+            break
         #
         # Now all live nursery objects should be out.  Update the young
         # weakrefs' targets.
@@ -1181,7 +1245,7 @@
         # we don't need to trace prebuilt GcStructs during a minor collect:
         # if a prebuilt GcStruct contains a pointer to a young object,
         # then the write_barrier must have ensured that the prebuilt
-        # GcStruct is in the list self.old_objects_pointing_to_young.
+        # GcStruct is in the list self.objects_pointing_to_young.
         self.root_walker.walk_roots(
             MiniMarkGC._trace_drag_out1,  # stack roots
             MiniMarkGC._trace_drag_out1,  # static in prebuilt non-gc
@@ -1189,7 +1253,7 @@
 
     def collect_cardrefs_to_nursery(self):
         size_gc_header = self.gcheaderbuilder.size_gc_header
-        oldlist = self.old_objects_with_cards_set
+        oldlist = self.objects_with_cards_set
         while oldlist.non_empty():
             obj = oldlist.pop()
             #
@@ -1205,11 +1269,11 @@
             bytes = self.card_marking_bytes_for_length(length)
             p = llarena.getfakearenaaddress(obj - size_gc_header)
             #
-            # If the object doesn't have GCFLAG_NO_YOUNG_PTRS, then it
-            # means that it is in 'old_objects_pointing_to_young' and
+            # If the object doesn't have GCFLAG_TRACK_YOUNG_PTRS, then it
+            # means that it is in 'objects_pointing_to_young' and
             # will be fully traced by collect_oldrefs_to_nursery() just
             # afterwards.
-            if self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS == 0:
+            if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
                 #
                 # In that case, we just have to reset all card bits.
                 while bytes > 0:
@@ -1245,19 +1309,30 @@
 
 
     def collect_oldrefs_to_nursery(self):
-        # Follow the old_objects_pointing_to_young list and move the
+        # Follow the objects_pointing_to_young list and move the
         # young objects they point to out of the nursery.
-        oldlist = self.old_objects_pointing_to_young
+        oldlist = self.objects_pointing_to_young
         while oldlist.non_empty():
             obj = oldlist.pop()
             #
-            # Add the flag GCFLAG_NO_YOUNG_PTRS.  All live objects should have
-            # this flag set after a nursery collection.
-            self.header(obj).tid |= GCFLAG_NO_YOUNG_PTRS
+            # Check (somehow) that the flags are correct: we must not have
+            # GCFLAG_TRACK_YOUNG_PTRS so far.  But in a rare case, it's
+            # possible that the same obj is appended twice to the list
+            # (see _trace_drag_out, GCFLAG_VISITED case).  Filter it out
+            # here.
+            if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS != 0:
+                ll_assert(self.header(obj).tid & GCFLAG_VISITED != 0,
+                          "objects_pointing_to_young contains obj with "
+                          "GCFLAG_TRACK_YOUNG_PTRS and not GCFLAG_VISITED")
+                continue
+            #
+            # Add the flag GCFLAG_TRACK_YOUNG_PTRS.  All live objects should
+            # have this flag set after a nursery collection.
+            self.header(obj).tid |= GCFLAG_TRACK_YOUNG_PTRS
             #
             # Trace the 'obj' to replace pointers to nursery with pointers
             # outside the nursery, possibly forcing nursery objects out
-            # and adding them to 'old_objects_pointing_to_young' as well.
+            # and adding them to 'objects_pointing_to_young' as well.
             self.trace_and_drag_out_of_nursery(obj)
 
     def trace_and_drag_out_of_nursery(self, obj):
@@ -1296,7 +1371,19 @@
                 # 'obj' points to a young, raw-malloced object
                 if (self.header(obj).tid & GCFLAG_VISITED) == 0:
                     self.header(obj).tid |= GCFLAG_VISITED
-                    self.old_objects_pointing_to_young.append(obj)
+                    #
+                    # we just made 'obj' old, so we may need to add it
+                    # in the correct list:
+                    if self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
+                        # common case: GCFLAG_TRACK_YOUNG_PTRS is not set, so
+                        # the object may contain young pointers anywhere
+                        self.objects_pointing_to_young.append(obj)
+                    else:
+                        # large array case: the object contains card marks
+                        # that tell us where young pointers are, and it
+                        # is already in objects_with_cards_set.
+                        ll_assert(self.header(obj).tid & GCFLAG_HAS_CARDS != 0,
+                                  "neither YOUNG_PTRS nor HAS_CARDS??")
             return
         #
         # If 'obj' was already forwarded, change it to its forwarding address.
@@ -1343,11 +1430,11 @@
         # Change the original pointer to this object.
         root.address[0] = newobj
         #
-        # Add the newobj to the list 'old_objects_pointing_to_young',
+        # Add the newobj to the list 'objects_pointing_to_young',
         # because it can contain further pointers to other young objects.
         # We will fix such references to point to the copy of the young
-        # objects when we walk 'old_objects_pointing_to_young'.
-        self.old_objects_pointing_to_young.append(newobj)
+        # objects when we walk 'objects_pointing_to_young'.
+        self.objects_pointing_to_young.append(newobj)
 
 
     def _malloc_out_of_nursery(self, totalsize):
diff --git a/pypy/rpython/memory/gc/test/test_direct.py b/pypy/rpython/memory/gc/test/test_direct.py
--- a/pypy/rpython/memory/gc/test/test_direct.py
+++ b/pypy/rpython/memory/gc/test/test_direct.py
@@ -522,5 +522,78 @@
             self.stackroots.pop()
     test_card_marker.GC_PARAMS = {"card_page_indices": 4}
 
+    def test_writebarrier_before_copy(self):
+        from pypy.rpython.memory.gc import minimark
+        largeobj_size =  self.gc.nonlarge_max + 1
+        p_src = self.malloc(VAR, largeobj_size)
+        p_dst = self.malloc(VAR, largeobj_size)
+        # make them old
+        self.stackroots.append(p_src)
+        self.stackroots.append(p_dst)
+        self.gc.collect()
+        p_dst = self.stackroots.pop()
+        p_src = self.stackroots.pop()
+        #
+        addr_src = llmemory.cast_ptr_to_adr(p_src)
+        addr_dst = llmemory.cast_ptr_to_adr(p_dst)
+        hdr_src = self.gc.header(addr_src)
+        hdr_dst = self.gc.header(addr_dst)
+        #
+        assert hdr_src.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS
+        assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS
+        #
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst, 0, 0, 10)
+        assert res
+        assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS
+        #
+        hdr_src.tid &= ~minimark.GCFLAG_TRACK_YOUNG_PTRS  # pretend we have young ptrs
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst, 0, 0, 10)
+        assert res # we optimized it
+        assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS == 0 # and we copied the flag
+        #
+        hdr_src.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
+        hdr_dst.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
+        hdr_src.tid |= minimark.GCFLAG_HAS_CARDS
+        hdr_src.tid |= minimark.GCFLAG_CARDS_SET
+        # hdr_dst.tid does not have minimark.GCFLAG_HAS_CARDS
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst, 0, 0, 10)
+        assert not res # there might be young ptrs, let ll_arraycopy to find them
+
+    def test_writebarrier_before_copy_preserving_cards(self):
+        from pypy.rpython.lltypesystem import llarena
+        from pypy.rpython.memory.gc import minimark
+        tid = self.get_type_id(VAR)
+        largeobj_size =  self.gc.nonlarge_max + 1
+        addr_src = self.gc.external_malloc(tid, largeobj_size)
+        addr_dst = self.gc.external_malloc(tid, largeobj_size)
+        hdr_src = self.gc.header(addr_src)
+        hdr_dst = self.gc.header(addr_dst)
+        #
+        assert hdr_src.tid & minimark.GCFLAG_HAS_CARDS
+        assert hdr_dst.tid & minimark.GCFLAG_HAS_CARDS
+        #
+        young_p = self.malloc(S)
+        self.gc.write_barrier_from_array(young_p, addr_src, 0)
+        index_in_third_page = int(2.5 * self.gc.card_page_indices)
+        assert index_in_third_page < largeobj_size
+        self.gc.write_barrier_from_array(young_p, addr_src,
+                                         index_in_third_page)
+        #
+        assert hdr_src.tid & minimark.GCFLAG_CARDS_SET
+        addr_byte = self.gc.get_card(addr_src, 0)
+        assert ord(addr_byte.char[0]) == 0x01 | 0x04  # bits 0 and 2
+        #
+        res = self.gc.writebarrier_before_copy(addr_src, addr_dst,
+                                             0, 0, 2*self.gc.card_page_indices)
+        assert res
+        #
+        assert hdr_dst.tid & minimark.GCFLAG_CARDS_SET
+        addr_byte = self.gc.get_card(addr_dst, 0)
+        assert ord(addr_byte.char[0]) == 0x01 | 0x04  # bits 0 and 2
+
+    test_writebarrier_before_copy_preserving_cards.GC_PARAMS = {
+        "card_page_indices": 4}
+
+
 class TestMiniMarkGCFull(DirectGCTest):
     from pypy.rpython.memory.gc.minimark import MiniMarkGC as GCClass
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -322,7 +322,8 @@
         if hasattr(GCClass, 'writebarrier_before_copy'):
             self.wb_before_copy_ptr = \
                     getfn(GCClass.writebarrier_before_copy.im_func,
-                    [s_gc] + [annmodel.SomeAddress()] * 2, annmodel.SomeBool())
+                    [s_gc] + [annmodel.SomeAddress()] * 2 +
+                    [annmodel.SomeInteger()] * 3, annmodel.SomeBool())
         elif GCClass.needs_write_barrier:
             raise NotImplementedError("GC needs write barrier, but does not provide writebarrier_before_copy functionality")
 
@@ -884,7 +885,7 @@
         dest_addr = hop.genop('cast_ptr_to_adr', [op.args[1]],
                                 resulttype=llmemory.Address)
         hop.genop('direct_call', [self.wb_before_copy_ptr, self.c_const_gc,
-                                  source_addr, dest_addr],
+                                  source_addr, dest_addr] + op.args[2:],
                   resultvar=op.result)
 
     def gct_weakref_create(self, hop):
diff --git a/pypy/rpython/memory/gctransform/test/test_framework.py b/pypy/rpython/memory/gctransform/test/test_framework.py
--- a/pypy/rpython/memory/gctransform/test/test_framework.py
+++ b/pypy/rpython/memory/gctransform/test/test_framework.py
@@ -163,7 +163,8 @@
     GC_PARAMS = {}
     class GCClass(MarkSweepGC):
         needs_write_barrier = True
-        def writebarrier_before_copy(self, source, dest):
+        def writebarrier_before_copy(self, source, dest,
+                                     source_start, dest_start, length):
             return True
 
 def write_barrier_check(spaceop, needs_write_barrier=True):
diff --git a/pypy/rpython/memory/gcwrapper.py b/pypy/rpython/memory/gcwrapper.py
--- a/pypy/rpython/memory/gcwrapper.py
+++ b/pypy/rpython/memory/gcwrapper.py
@@ -136,11 +136,14 @@
         ptr = lltype.cast_opaque_ptr(llmemory.GCREF, ptr)
         return self.gc.id(ptr)
 
-    def writebarrier_before_copy(self, source, dest):
+    def writebarrier_before_copy(self, source, dest,
+                                 source_start, dest_start, length):
         if self.gc.needs_write_barrier:
             source_addr = llmemory.cast_ptr_to_adr(source)
             dest_addr   = llmemory.cast_ptr_to_adr(dest)
-            return self.gc.writebarrier_before_copy(source_addr, dest_addr)
+            return self.gc.writebarrier_before_copy(source_addr, dest_addr,
+                                                    source_start, dest_start,
+                                                    length)
         else:
             return True
 
diff --git a/pypy/rpython/memory/support.py b/pypy/rpython/memory/support.py
--- a/pypy/rpython/memory/support.py
+++ b/pypy/rpython/memory/support.py
@@ -140,6 +140,14 @@
             self.foreach(_add_in_dict, result)
             return result
 
+        def tolist(self):
+            """NOT_RPYTHON.  Returns the content as a list."""
+            lst = []
+            def _add(obj, lst):
+                lst.append(obj)
+            self.foreach(_add, lst)
+            return lst
+
         def remove(self, addr):
             """Remove 'addr' from the stack.  The addr *must* be in the list,
             and preferrably near the top.
diff --git a/pypy/rpython/ootypesystem/rdict.py b/pypy/rpython/ootypesystem/rdict.py
--- a/pypy/rpython/ootypesystem/rdict.py
+++ b/pypy/rpython/ootypesystem/rdict.py
@@ -18,7 +18,7 @@
 
 class DictRepr(AbstractDictRepr):
     def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
-                 custom_eq_hash=None):
+                 custom_eq_hash=None, force_non_null=False):
         self.rtyper = rtyper
         self.custom_eq_hash = custom_eq_hash is not None
 
diff --git a/pypy/rpython/rdict.py b/pypy/rpython/rdict.py
--- a/pypy/rpython/rdict.py
+++ b/pypy/rpython/rdict.py
@@ -15,6 +15,7 @@
         dictvalue = self.dictdef.dictvalue
         s_key     = dictkey  .s_value
         s_value   = dictvalue.s_value
+        force_non_null = self.dictdef.force_non_null
         if (s_key.__class__ is annmodel.SomeObject and s_key.knowntype == object and
             s_value.__class__ is annmodel.SomeObject and s_value.knowntype == object):
             return robject.pyobj_repr
@@ -29,7 +30,8 @@
                                                      lambda: rtyper.getrepr(s_value),
                                                      dictkey,
                                                      dictvalue,
-                                                     custom_eq_hash)
+                                                     custom_eq_hash,
+                                                     force_non_null)
 
     def rtyper_makekey(self):
         self.dictdef.dictkey  .dont_change_any_more = True
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -598,7 +598,6 @@
         res = self.interpret(func, [])
         assert res in [5263, 6352]
 
-
 class TestLLtype(BaseTestRdict, LLRtypeMixin):
     def test_dict_but_not_with_char_keys(self):
         def func(i):
@@ -860,6 +859,25 @@
         res = f()
         assert res == 1
 
+    def test_nonnull_hint(self):
+        def eq(a, b):
+            return a == b
+        def rhash(a):
+            return 3
+        
+        def func(i):
+            d = r_dict(eq, rhash, force_non_null=True)
+            if not i:
+                d[None] = i
+            else:
+                d[str(i)] = i
+            return "12" in d, d
+
+        llres = self.interpret(func, [12])
+        assert llres.item0 == 1
+        DICT = lltype.typeOf(llres.item1)
+        assert sorted(DICT.TO.entries.TO.OF._flds) == ['f_hash', 'key', 'value']
+
     # ____________________________________________________________
 
 
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -121,6 +121,9 @@
     def getcode(self):
         return self.code
 
+    def has_valid_code(self):
+        return self.code is not None
+
     def getopcode(self):
         return self.code.map[self.bytecode_no]
 
@@ -220,6 +223,12 @@
         return self._lineset
     lineset = property(getlineset)
 
+    def has_valid_code(self):
+        for chunk in self.chunks:
+            if not chunk.has_valid_code():
+                return False
+        return True
+
     def _compute_linerange(self):
         self._lineset = set()
         minline = sys.maxint
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -168,7 +168,7 @@
     []
     int_add(0, 1)
     ''')
-    loops = LoopStorage().reconnect_loops([main, bridge])
+    LoopStorage().reconnect_loops([main, bridge])
     assert adjust_bridges(main, {})[1].name == 'guard_true'
     assert adjust_bridges(main, {'loop-13': True})[1].name == 'int_add'