[pypy-commit] pypy numpypy-out: merge with default, still waiting for review

Sun Mar 4 18:13:10 CET 2012

Author: mattip
Branch: numpypy-out
Changeset: r53185:5c09a846eaaa
Date: 2012-03-04 17:45 +0200
http://bitbucket.org/pypy/pypy/changeset/5c09a846eaaa/

Log:	merge with default, still waiting for review

diff --git a/lib-python/modified-2.7/ctypes/test/test_arrays.py b/lib-python/modified-2.7/ctypes/test/test_arrays.py
--- a/lib-python/modified-2.7/ctypes/test/test_arrays.py
+++ b/lib-python/modified-2.7/ctypes/test/test_arrays.py
@@ -1,12 +1,23 @@
 import unittest
 from ctypes import *
+from test.test_support import impl_detail
 
 formats = "bBhHiIlLqQfd"
 
+# c_longdouble commented out for PyPy, look at the commend in test_longdouble
 formats = c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, \
-          c_long, c_ulonglong, c_float, c_double, c_longdouble
+          c_long, c_ulonglong, c_float, c_double #, c_longdouble
 
 class ArrayTestCase(unittest.TestCase):
+
+    @impl_detail('long double not supported by PyPy', pypy=False)
+    def test_longdouble(self):
+        """
+        This test is empty. It's just here to remind that we commented out
+        c_longdouble in "formats". If pypy will ever supports c_longdouble, we
+        should kill this test and uncomment c_longdouble inside formats.
+        """
+
     def test_simple(self):
         # create classes holding simple numeric types, and check
         # various properties.
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -2,16 +2,95 @@
 # One-liner implementation of cPickle
 #
 
-from pickle import *
+from pickle import Pickler, dump, dumps, PickleError, PicklingError, UnpicklingError, _EmptyClass
 from pickle import __doc__, __version__, format_version, compatible_formats
+from types import *
+from copy_reg import dispatch_table
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import marshal, struct, sys
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
 
+# These are purely informational; no code uses these.
+format_version = "2.0"                  # File format version we write
+compatible_formats = ["1.0",            # Original protocol 0
+                      "1.1",            # Protocol 0 with INST added
+                      "1.2",            # Original protocol 1
+                      "1.3",            # Protocol 1 with BINFLOAT added
+                      "2.0",            # Protocol 2
+                      ]                 # Old format versions we can read
+
+# Keep in synch with cPickle.  This is the highest protocol number we
+# know how to read.
+HIGHEST_PROTOCOL = 2
 
 BadPickleGet = KeyError
 UnpickleableError = PicklingError
 
+MARK            = ord('(')   # push special markobject on stack
+STOP            = ord('.')   # every pickle ends with STOP
+POP             = ord('0')   # discard topmost stack item
+POP_MARK        = ord('1')   # discard stack top through topmost markobject
+DUP             = ord('2')   # duplicate top stack item
+FLOAT           = ord('F')   # push float object; decimal string argument
+INT             = ord('I')   # push integer or bool; decimal string argument
+BININT          = ord('J')   # push four-byte signed int
+BININT1         = ord('K')   # push 1-byte unsigned int
+LONG            = ord('L')   # push long; decimal string argument
+BININT2         = ord('M')   # push 2-byte unsigned int
+NONE            = ord('N')   # push None
+PERSID          = ord('P')   # push persistent object; id is taken from string arg
+BINPERSID       = ord('Q')   #  "       "         "  ;  "  "   "     "  stack
+REDUCE          = ord('R')   # apply callable to argtuple, both on stack
+STRING          = ord('S')   # push string; NL-terminated string argument
+BINSTRING       = ord('T')   # push string; counted binary string argument
+SHORT_BINSTRING = ord('U')   #  "     "   ;    "      "       "      " < 256 bytes
+UNICODE         = ord('V')   # push Unicode string; raw-unicode-escaped'd argument
+BINUNICODE      = ord('X')   #   "     "       "  ; counted UTF-8 string argument
+APPEND          = ord('a')   # append stack top to list below it
+BUILD           = ord('b')   # call __setstate__ or __dict__.update()
+GLOBAL          = ord('c')   # push self.find_class(modname, name); 2 string args
+DICT            = ord('d')   # build a dict from stack items
+EMPTY_DICT      = ord('}')   # push empty dict
+APPENDS         = ord('e')   # extend list on stack by topmost stack slice
+GET             = ord('g')   # push item from memo on stack; index is string arg
+BINGET          = ord('h')   #   "    "    "    "   "   "  ;   "    " 1-byte arg
+INST            = ord('i')   # build & push class instance
+LONG_BINGET     = ord('j')   # push item from memo on stack; index is 4-byte arg
+LIST            = ord('l')   # build list from topmost stack items
+EMPTY_LIST      = ord(']')   # push empty list
+OBJ             = ord('o')   # build & push class instance
+PUT             = ord('p')   # store stack top in memo; index is string arg
+BINPUT          = ord('q')   #   "     "    "   "   " ;   "    " 1-byte arg
+LONG_BINPUT     = ord('r')   #   "     "    "   "   " ;   "    " 4-byte arg
+SETITEM         = ord('s')   # add key+value pair to dict
+TUPLE           = ord('t')   # build tuple from topmost stack items
+EMPTY_TUPLE     = ord(')')   # push empty tuple
+SETITEMS        = ord('u')   # modify dict by adding topmost key+value pairs
+BINFLOAT        = ord('G')   # push float; arg is 8-byte float encoding
+
+TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
+FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
+
+# Protocol 2
+
+PROTO           = ord('\x80')  # identify pickle protocol
+NEWOBJ          = ord('\x81')  # build object by applying cls.__new__ to argtuple
+EXT1            = ord('\x82')  # push object from extension registry; 1-byte index
+EXT2            = ord('\x83')  # ditto, but 2-byte index
+EXT4            = ord('\x84')  # ditto, but 4-byte index
+TUPLE1          = ord('\x85')  # build 1-tuple from stack top
+TUPLE2          = ord('\x86')  # build 2-tuple from two topmost stack items
+TUPLE3          = ord('\x87')  # build 3-tuple from three topmost stack items
+NEWTRUE         = ord('\x88')  # push True
+NEWFALSE        = ord('\x89')  # push False
+LONG1           = ord('\x8a')  # push long from < 256 bytes
+LONG4           = ord('\x8b')  # push really big long
+
+_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+
+
 # ____________________________________________________________
 # XXX some temporary dark magic to produce pickled dumps that are
 #     closer to the ones produced by cPickle in CPython
@@ -44,3 +123,474 @@
     file = StringIO()
     Pickler(file, protocol).dump(obj)
     return file.getvalue()
+
+# Why use struct.pack() for pickling but marshal.loads() for
+# unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
+# marshal.loads() is twice as fast as struct.unpack()!
+mloads = marshal.loads
+
+# Unpickling machinery
+
+class Unpickler(object):
+
+    def __init__(self, file):
+        """This takes a file-like object for reading a pickle data stream.
+
+        The protocol version of the pickle is detected automatically, so no
+        proto argument is needed.
+
+        The file-like object must have two methods, a read() method that
+        takes an integer argument, and a readline() method that requires no
+        arguments.  Both methods should return a string.  Thus file-like
+        object can be a file object opened for reading, a StringIO object,
+        or any other custom object that meets this interface.
+        """
+        self.readline = file.readline
+        self.read = file.read
+        self.memo = {}
+
+    def load(self):
+        """Read a pickled object representation from the open file.
+
+        Return the reconstituted object hierarchy specified in the file.
+        """
+        self.mark = object() # any new unique object
+        self.stack = []
+        self.append = self.stack.append
+        try:
+            key = ord(self.read(1))
+            while key != STOP:
+                self.dispatch[key](self)
+                key = ord(self.read(1))
+        except TypeError:
+            if self.read(1) == '':
+                raise EOFError
+            raise
+        return self.stack.pop()
+
+    # Return largest index k such that self.stack[k] is self.mark.
+    # If the stack doesn't contain a mark, eventually raises IndexError.
+    # This could be sped by maintaining another stack, of indices at which
+    # the mark appears.  For that matter, the latter stack would suffice,
+    # and we wouldn't need to push mark objects on self.stack at all.
+    # Doing so is probably a good thing, though, since if the pickle is
+    # corrupt (or hostile) we may get a clue from finding self.mark embedded
+    # in unpickled objects.
+    def marker(self):
+        k = len(self.stack)-1
+        while self.stack[k] is not self.mark: k -= 1
+        return k
+
+    dispatch = {}
+
+    def load_proto(self):
+        proto = ord(self.read(1))
+        if not 0 <= proto <= 2:
+            raise ValueError, "unsupported pickle protocol: %d" % proto
+    dispatch[PROTO] = load_proto
+
+    def load_persid(self):
+        pid = self.readline()[:-1]
+        self.append(self.persistent_load(pid))
+    dispatch[PERSID] = load_persid
+
+    def load_binpersid(self):
+        pid = self.stack.pop()
+        self.append(self.persistent_load(pid))
+    dispatch[BINPERSID] = load_binpersid
+
+    def load_none(self):
+        self.append(None)
+    dispatch[NONE] = load_none
+
+    def load_false(self):
+        self.append(False)
+    dispatch[NEWFALSE] = load_false
+
+    def load_true(self):
+        self.append(True)
+    dispatch[NEWTRUE] = load_true
+
+    def load_int(self):
+        data = self.readline()
+        if data == FALSE[1:]:
+            val = False
+        elif data == TRUE[1:]:
+            val = True
+        else:
+            try:
+                val = int(data)
+            except ValueError:
+                val = long(data)
+        self.append(val)
+    dispatch[INT] = load_int
+
+    def load_binint(self):
+        self.append(mloads('i' + self.read(4)))
+    dispatch[BININT] = load_binint
+
+    def load_binint1(self):
+        self.append(ord(self.read(1)))
+    dispatch[BININT1] = load_binint1
+
+    def load_binint2(self):
+        self.append(mloads('i' + self.read(2) + '\000\000'))
+    dispatch[BININT2] = load_binint2
+
+    def load_long(self):
+        self.append(long(self.readline()[:-1], 0))
+    dispatch[LONG] = load_long
+
+    def load_long1(self):
+        n = ord(self.read(1))
+        bytes = self.read(n)
+        self.append(decode_long(bytes))
+    dispatch[LONG1] = load_long1
+
+    def load_long4(self):
+        n = mloads('i' + self.read(4))
+        bytes = self.read(n)
+        self.append(decode_long(bytes))
+    dispatch[LONG4] = load_long4
+
+    def load_float(self):
+        self.append(float(self.readline()[:-1]))
+    dispatch[FLOAT] = load_float
+
+    def load_binfloat(self, unpack=struct.unpack):
+        self.append(unpack('>d', self.read(8))[0])
+    dispatch[BINFLOAT] = load_binfloat
+
+    def load_string(self):
+        rep = self.readline()
+        if len(rep) < 3:
+            raise ValueError, "insecure string pickle"
+        if rep[0] == "'" == rep[-2]:
+            rep = rep[1:-2]
+        elif rep[0] == '"' == rep[-2]:
+            rep = rep[1:-2]
+        else:
+            raise ValueError, "insecure string pickle"
+        self.append(rep.decode("string-escape"))
+    dispatch[STRING] = load_string
+
+    def load_binstring(self):
+        L = mloads('i' + self.read(4))
+        self.append(self.read(L))
+    dispatch[BINSTRING] = load_binstring
+
+    def load_unicode(self):
+        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
+    dispatch[UNICODE] = load_unicode
+
+    def load_binunicode(self):
+        L = mloads('i' + self.read(4))
+        self.append(unicode(self.read(L),'utf-8'))
+    dispatch[BINUNICODE] = load_binunicode
+
+    def load_short_binstring(self):
+        L = ord(self.read(1))
+        self.append(self.read(L))
+    dispatch[SHORT_BINSTRING] = load_short_binstring
+
+    def load_tuple(self):
+        k = self.marker()
+        self.stack[k:] = [tuple(self.stack[k+1:])]
+    dispatch[TUPLE] = load_tuple
+
+    def load_empty_tuple(self):
+        self.stack.append(())
+    dispatch[EMPTY_TUPLE] = load_empty_tuple
+
+    def load_tuple1(self):
+        self.stack[-1] = (self.stack[-1],)
+    dispatch[TUPLE1] = load_tuple1
+
+    def load_tuple2(self):
+        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
+    dispatch[TUPLE2] = load_tuple2
+
+    def load_tuple3(self):
+        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
+    dispatch[TUPLE3] = load_tuple3
+
+    def load_empty_list(self):
+        self.stack.append([])
+    dispatch[EMPTY_LIST] = load_empty_list
+
+    def load_empty_dictionary(self):
+        self.stack.append({})
+    dispatch[EMPTY_DICT] = load_empty_dictionary
+
+    def load_list(self):
+        k = self.marker()
+        self.stack[k:] = [self.stack[k+1:]]
+    dispatch[LIST] = load_list
+
+    def load_dict(self):
+        k = self.marker()
+        d = {}
+        items = self.stack[k+1:]
+        for i in range(0, len(items), 2):
+            key = items[i]
+            value = items[i+1]
+            d[key] = value
+        self.stack[k:] = [d]
+    dispatch[DICT] = load_dict
+
+    # INST and OBJ differ only in how they get a class object.  It's not
+    # only sensible to do the rest in a common routine, the two routines
+    # previously diverged and grew different bugs.
+    # klass is the class to instantiate, and k points to the topmost mark
+    # object, following which are the arguments for klass.__init__.
+    def _instantiate(self, klass, k):
+        args = tuple(self.stack[k+1:])
+        del self.stack[k:]
+        instantiated = 0
+        if (not args and
+                type(klass) is ClassType and
+                not hasattr(klass, "__getinitargs__")):
+            try:
+                value = _EmptyClass()
+                value.__class__ = klass
+                instantiated = 1
+            except RuntimeError:
+                # In restricted execution, assignment to inst.__class__ is
+                # prohibited
+                pass
+        if not instantiated:
+            try:
+                value = klass(*args)
+            except TypeError, err:
+                raise TypeError, "in constructor for %s: %s" % (
+                    klass.__name__, str(err)), sys.exc_info()[2]
+        self.append(value)
+
+    def load_inst(self):
+        module = self.readline()[:-1]
+        name = self.readline()[:-1]
+        klass = self.find_class(module, name)
+        self._instantiate(klass, self.marker())
+    dispatch[INST] = load_inst
+
+    def load_obj(self):
+        # Stack is ... markobject classobject arg1 arg2 ...
+        k = self.marker()
+        klass = self.stack.pop(k+1)
+        self._instantiate(klass, k)
+    dispatch[OBJ] = load_obj
+
+    def load_newobj(self):
+        args = self.stack.pop()
+        cls = self.stack[-1]
+        obj = cls.__new__(cls, *args)
+        self.stack[-1] = obj
+    dispatch[NEWOBJ] = load_newobj
+
+    def load_global(self):
+        module = self.readline()[:-1]
+        name = self.readline()[:-1]
+        klass = self.find_class(module, name)
+        self.append(klass)
+    dispatch[GLOBAL] = load_global
+
+    def load_ext1(self):
+        code = ord(self.read(1))
+        self.get_extension(code)
+    dispatch[EXT1] = load_ext1
+
+    def load_ext2(self):
+        code = mloads('i' + self.read(2) + '\000\000')
+        self.get_extension(code)
+    dispatch[EXT2] = load_ext2
+
+    def load_ext4(self):
+        code = mloads('i' + self.read(4))
+        self.get_extension(code)
+    dispatch[EXT4] = load_ext4
+
+    def get_extension(self, code):
+        nil = []
+        obj = _extension_cache.get(code, nil)
+        if obj is not nil:
+            self.append(obj)
+            return
+        key = _inverted_registry.get(code)
+        if not key:
+            raise ValueError("unregistered extension code %d" % code)
+        obj = self.find_class(*key)
+        _extension_cache[code] = obj
+        self.append(obj)
+
+    def find_class(self, module, name):
+        # Subclasses may override this
+        __import__(module)
+        mod = sys.modules[module]
+        klass = getattr(mod, name)
+        return klass
+
+    def load_reduce(self):
+        args = self.stack.pop()
+        func = self.stack[-1]
+        value = self.stack[-1](*args)
+        self.stack[-1] = value
+    dispatch[REDUCE] = load_reduce
+
+    def load_pop(self):
+        del self.stack[-1]
+    dispatch[POP] = load_pop
+
+    def load_pop_mark(self):
+        k = self.marker()
+        del self.stack[k:]
+    dispatch[POP_MARK] = load_pop_mark
+
+    def load_dup(self):
+        self.append(self.stack[-1])
+    dispatch[DUP] = load_dup
+
+    def load_get(self):
+        self.append(self.memo[self.readline()[:-1]])
+    dispatch[GET] = load_get
+
+    def load_binget(self):
+        i = ord(self.read(1))
+        self.append(self.memo[repr(i)])
+    dispatch[BINGET] = load_binget
+
+    def load_long_binget(self):
+        i = mloads('i' + self.read(4))
+        self.append(self.memo[repr(i)])
+    dispatch[LONG_BINGET] = load_long_binget
+
+    def load_put(self):
+        self.memo[self.readline()[:-1]] = self.stack[-1]
+    dispatch[PUT] = load_put
+
+    def load_binput(self):
+        i = ord(self.read(1))
+        self.memo[repr(i)] = self.stack[-1]
+    dispatch[BINPUT] = load_binput
+
+    def load_long_binput(self):
+        i = mloads('i' + self.read(4))
+        self.memo[repr(i)] = self.stack[-1]
+    dispatch[LONG_BINPUT] = load_long_binput
+
+    def load_append(self):
+        value = self.stack.pop()
+        self.stack[-1].append(value)
+    dispatch[APPEND] = load_append
+
+    def load_appends(self):
+        stack = self.stack
+        mark = self.marker()
+        lst = stack[mark - 1]
+        lst.extend(stack[mark + 1:])
+        del stack[mark:]
+    dispatch[APPENDS] = load_appends
+
+    def load_setitem(self):
+        stack = self.stack
+        value = stack.pop()
+        key = stack.pop()
+        dict = stack[-1]
+        dict[key] = value
+    dispatch[SETITEM] = load_setitem
+
+    def load_setitems(self):
+        stack = self.stack
+        mark = self.marker()
+        dict = stack[mark - 1]
+        for i in range(mark + 1, len(stack), 2):
+            dict[stack[i]] = stack[i + 1]
+
+        del stack[mark:]
+    dispatch[SETITEMS] = load_setitems
+
+    def load_build(self):
+        stack = self.stack
+        state = stack.pop()
+        inst = stack[-1]
+        setstate = getattr(inst, "__setstate__", None)
+        if setstate:
+            setstate(state)
+            return
+        slotstate = None
+        if isinstance(state, tuple) and len(state) == 2:
+            state, slotstate = state
+        if state:
+            try:
+                d = inst.__dict__
+                try:
+                    for k, v in state.iteritems():
+                        d[intern(k)] = v
+                # keys in state don't have to be strings
+                # don't blow up, but don't go out of our way
+                except TypeError:
+                    d.update(state)
+
+            except RuntimeError:
+                # XXX In restricted execution, the instance's __dict__
+                # is not accessible.  Use the old way of unpickling
+                # the instance variables.  This is a semantic
+                # difference when unpickling in restricted
+                # vs. unrestricted modes.
+                # Note, however, that cPickle has never tried to do the
+                # .update() business, and always uses
+                #     PyObject_SetItem(inst.__dict__, key, value) in a
+                # loop over state.items().
+                for k, v in state.items():
+                    setattr(inst, k, v)
+        if slotstate:
+            for k, v in slotstate.items():
+                setattr(inst, k, v)
+    dispatch[BUILD] = load_build
+
+    def load_mark(self):
+        self.append(self.mark)
+    dispatch[MARK] = load_mark
+
+#from pickle import decode_long
+
+def decode_long(data):
+    r"""Decode a long from a two's complement little-endian binary string.
+
+    >>> decode_long('')
+    0L
+    >>> decode_long("\xff\x00")
+    255L
+    >>> decode_long("\xff\x7f")
+    32767L
+    >>> decode_long("\x00\xff")
+    -256L
+    >>> decode_long("\x00\x80")
+    -32768L
+    >>> decode_long("\x80")
+    -128L
+    >>> decode_long("\x7f")
+    127L
+    """
+
+    nbytes = len(data)
+    if nbytes == 0:
+        return 0L
+    ind = nbytes - 1
+    while ind and ord(data[ind]) == 0:
+        ind -= 1
+    n = ord(data[ind])
+    while ind:
+        n <<= 8
+        ind -= 1
+        if ord(data[ind]):
+            n += ord(data[ind])
+    if ord(data[nbytes - 1]) >= 128:
+        n -= 1L << (nbytes << 3)
+    return n
+
+def load(f):
+    return Unpickler(f).load()
+
+def loads(str):
+    f = StringIO(str)
+    return Unpickler(f).load()
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1032,8 +1032,8 @@
     def __setstate(self, string):
         if len(string) != 4 or not (1 <= ord(string[2]) <= 12):
             raise TypeError("not enough arguments")
-        yhi, ylo, self._month, self._day = map(ord, string)
-        self._year = yhi * 256 + ylo
+        self._month, self._day = ord(string[2]), ord(string[3])
+        self._year = ord(string[0]) * 256 + ord(string[1])
 
     def __reduce__(self):
         return (self.__class__, self._getstate())
@@ -1421,9 +1421,10 @@
     def __setstate(self, string, tzinfo):
         if len(string) != 6 or ord(string[0]) >= 24:
             raise TypeError("an integer is required")
-        self._hour, self._minute, self._second, us1, us2, us3 = \
-                                                            map(ord, string)
-        self._microsecond = (((us1 << 8) | us2) << 8) | us3
+        self._hour, self._minute, self._second = ord(string[0]), \
+                                                 ord(string[1]), ord(string[2])
+        self._microsecond = (((ord(string[3]) << 8) | \
+                            ord(string[4])) << 8) | ord(string[5])
         self._tzinfo = tzinfo
 
     def __reduce__(self):
@@ -1903,10 +1904,11 @@
             return (basestate, self._tzinfo)
 
     def __setstate(self, string, tzinfo):
-        (yhi, ylo, self._month, self._day, self._hour,
-         self._minute, self._second, us1, us2, us3) = map(ord, string)
-        self._year = yhi * 256 + ylo
-        self._microsecond = (((us1 << 8) | us2) << 8) | us3
+        (self._month, self._day, self._hour, self._minute,
+            self._second) = (ord(string[2]), ord(string[3]), ord(string[4]),
+                             ord(string[5]), ord(string[6]))
+        self._year = ord(string[0]) * 256 + ord(string[1])
+        self._microsecond = (((ord(string[7]) << 8) | ord(string[8])) << 8) | ord(string[9])
         self._tzinfo = tzinfo
 
     def __reduce__(self):
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -13,7 +13,7 @@
                and not p.basename.startswith('test')]
 
 essential_modules = dict.fromkeys(
-    ["exceptions", "_file", "sys", "__builtin__", "posix"]
+    ["exceptions", "_file", "sys", "__builtin__", "posix", "_warnings"]
 )
 
 default_modules = essential_modules.copy()
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1471,8 +1471,8 @@
 
     def warn(self, msg, w_warningcls):
         self.appexec([self.wrap(msg), w_warningcls], """(msg, warningcls):
-            import warnings
-            warnings.warn(msg, warningcls, stacklevel=2)
+            import _warnings
+            _warnings.warn(msg, warningcls, stacklevel=2)
         """)
 
     def resolve_target(self, w_obj):
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,5 +1,6 @@
 from pypy.interpreter.error import OperationError
 from pypy.interpreter import unicodehelper
+from pypy.rlib.rstring import StringBuilder
 
 def parsestr(space, encoding, s, unicode_literals=False):
     # compiler.transformer.Transformer.decode_literal depends on what 
@@ -115,21 +116,23 @@
     the string is UTF-8 encoded and should be re-encoded in the
     specified encoding.
     """
-    lis = []
+    builder = StringBuilder(len(s))
     ps = 0
     end = len(s)
-    while ps < end:
-        if s[ps] != '\\':
-            # note that the C code has a label here.
-            # the logic is the same.
+    while 1:
+        ps2 = ps
+        while ps < end and s[ps] != '\\':
             if recode_encoding and ord(s[ps]) & 0x80:
                 w, ps = decode_utf8(space, s, ps, end, recode_encoding)
-                # Append bytes to output buffer.
-                lis.append(w)
+                builder.append(w)
+                ps2 = ps
             else:
-                lis.append(s[ps])
                 ps += 1
-            continue
+        if ps > ps2:
+            builder.append_slice(s, ps2, ps)
+        if ps == end:
+            break
+
         ps += 1
         if ps == end:
             raise_app_valueerror(space, 'Trailing \\ in string')
@@ -140,25 +143,25 @@
         if ch == '\n':
             pass
         elif ch == '\\':
-            lis.append('\\')
+            builder.append('\\')
         elif ch == "'":
-            lis.append("'")
+            builder.append("'")
         elif ch == '"':
-            lis.append('"')
+            builder.append('"')
         elif ch == 'b':
-            lis.append("\010")
+            builder.append("\010")
         elif ch == 'f':
-            lis.append('\014') # FF
+            builder.append('\014') # FF
         elif ch == 't':
-            lis.append('\t')
+            builder.append('\t')
         elif ch == 'n':
-            lis.append('\n')
+            builder.append('\n')
         elif ch == 'r':
-            lis.append('\r')
+            builder.append('\r')
         elif ch == 'v':
-            lis.append('\013') # VT
+            builder.append('\013') # VT
         elif ch == 'a':
-            lis.append('\007') # BEL, not classic C
+            builder.append('\007') # BEL, not classic C
         elif ch in '01234567':
             # Look for up to two more octal digits
             span = ps
@@ -168,13 +171,13 @@
             # emulate a strange wrap-around behavior of CPython:
             # \400 is the same as \000 because 0400 == 256
             num = int(octal, 8) & 0xFF
-            lis.append(chr(num))
+            builder.append(chr(num))
             ps = span
         elif ch == 'x':
             if ps+2 <= end and isxdigit(s[ps]) and isxdigit(s[ps + 1]):
                 hexa = s[ps : ps + 2]
                 num = int(hexa, 16)
-                lis.append(chr(num))
+                builder.append(chr(num))
                 ps += 2
             else:
                 raise_app_valueerror(space, 'invalid \\x escape')
@@ -184,13 +187,13 @@
             # this was not an escape, so the backslash
             # has to be added, and we start over in
             # non-escape mode.
-            lis.append('\\')
+            builder.append('\\')
             ps -= 1
             assert ps >= 0
             continue
             # an arbitry number of unescaped UTF-8 bytes may follow.
 
-    buf = ''.join(lis)
+    buf = builder.build()
     return buf
 
 
diff --git a/pypy/interpreter/streamutil.py b/pypy/interpreter/streamutil.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/streamutil.py
@@ -0,0 +1,17 @@
+from pypy.rlib.streamio import StreamError
+from pypy.interpreter.error import OperationError, wrap_oserror2
+
+def wrap_streamerror(space, e, w_filename=None):
+    if isinstance(e, StreamError):
+        return OperationError(space.w_ValueError,
+                              space.wrap(e.message))
+    elif isinstance(e, OSError):
+        return wrap_oserror_as_ioerror(space, e, w_filename)
+    else:
+        # should not happen: wrap_streamerror() is only called when
+        # StreamErrors = (OSError, StreamError) are raised
+        return OperationError(space.w_IOError, space.w_None)
+
+def wrap_oserror_as_ioerror(space, e, w_filename=None):
+    return wrap_oserror2(space, e, w_filename,
+                         w_exception_class=space.w_IOError)
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -304,6 +304,42 @@
         assert_method(w_o1, "c", True)
         assert_method(w_o2, "c", False)
 
+    def test_total_ordering(self):
+        class W_SomeType(Wrappable):
+            def __init__(self, space, x):
+                self.space = space
+                self.x = x
+
+            def descr__lt(self, w_other):
+                assert isinstance(w_other, W_SomeType)
+                return self.space.wrap(self.x < w_other.x)
+
+            def descr__eq(self, w_other):
+                assert isinstance(w_other, W_SomeType)
+                return self.space.wrap(self.x == w_other.x)
+
+        W_SomeType.typedef = typedef.TypeDef(
+            'some_type',
+            __total_ordering__ = 'auto',
+            __lt__ = interp2app(W_SomeType.descr__lt),
+            __eq__ = interp2app(W_SomeType.descr__eq),
+            )
+        space = self.space
+        w_b = space.wrap(W_SomeType(space, 2))
+        w_c = space.wrap(W_SomeType(space, 2))
+        w_a = space.wrap(W_SomeType(space, 1))
+        # explicitly defined
+        assert space.is_true(space.lt(w_a, w_b))
+        assert not space.is_true(space.eq(w_a, w_b))
+        assert space.is_true(space.eq(w_b, w_c))
+        # automatically defined
+        assert space.is_true(space.le(w_a, w_b))
+        assert space.is_true(space.le(w_b, w_c))
+        assert space.is_true(space.gt(w_b, w_a))
+        assert space.is_true(space.ge(w_b, w_a))
+        assert space.is_true(space.ge(w_b, w_c))
+        assert space.is_true(space.ne(w_a, w_b))
+        assert not space.is_true(space.ne(w_b, w_c))
 
 class AppTestTypeDef:
 
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -12,7 +12,7 @@
 from pypy.rlib.jit import promote
 
 class TypeDef:
-    def __init__(self, __name, __base=None, **rawdict):
+    def __init__(self, __name, __base=None, __total_ordering__=None, **rawdict):
         "NOT_RPYTHON: initialization-time only"
         self.name = __name
         if __base is None:
@@ -34,6 +34,9 @@
         # xxx used by faking
         self.fakedcpytype = None
         self.add_entries(**rawdict)
+        assert __total_ordering__ in (None, 'auto'), "Unknown value for __total_ordering"
+        if __total_ordering__ == 'auto':
+            self.auto_total_ordering()
     
     def add_entries(self, **rawdict):
         # xxx fix the names of the methods to match what app-level expects
@@ -41,7 +44,15 @@
             if isinstance(value, (interp2app, GetSetProperty)):
                 value.name = key
         self.rawdict.update(rawdict)
-    
+
+    def auto_total_ordering(self):
+        assert '__lt__' in self.rawdict, "__total_ordering='auto' requires __lt__"
+        assert '__eq__' in self.rawdict, "__total_ordering='auto' requires __eq__"
+        self.add_entries(__le__ = auto__le__,
+                         __gt__ = auto__gt__,
+                         __ge__ = auto__ge__,
+                         __ne__ = auto__ne__)
+
     def _freeze_(self):
         # hint for the annotator: track individual constant instances of TypeDef
         return True
@@ -50,6 +61,26 @@
         return "<%s name=%r>" % (self.__class__.__name__, self.name)
 
 
+# generic special cmp methods defined on top of __lt__ and __eq__, used by
+# automatic total ordering
+
+ at interp2app
+def auto__le__(space, w_self, w_other):
+    return space.not_(space.lt(w_other, w_self))
+
+ at interp2app
+def auto__gt__(space, w_self, w_other):
+    return space.lt(w_other, w_self)
+
+ at interp2app
+def auto__ge__(space, w_self, w_other):
+    return space.not_(space.lt(w_self, w_other))
+
+ at interp2app
+def auto__ne__(space, w_self, w_other):
+    return space.not_(space.eq(w_self, w_other))
+
+
 # ____________________________________________________________
 #  Hash support
 
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -171,7 +171,7 @@
     'unicodesetitem'  : (('ref', 'int', 'int'), 'int'),
     'cast_ptr_to_int' : (('ref',), 'int'),
     'cast_int_to_ptr' : (('int',), 'ref'),
-    'debug_merge_point': (('ref', 'int'), None),
+    'debug_merge_point': (('ref', 'int', 'int'), None),
     'force_token'     : ((), 'int'),
     'call_may_force'  : (('int', 'varargs'), 'intorptr'),
     'guard_not_forced': ((), None),
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -208,6 +208,7 @@
     This is the class supporting --gcrootfinder=asmgcc.
     """
     is_shadow_stack = False
+    is_64_bit = (WORD == 8)
 
     LOC_REG       = 0
     LOC_ESP_PLUS  = 1
@@ -336,17 +337,17 @@
             self._gcmap_deadentries += 1
             item += asmgcroot.arrayitemsize
 
-    def get_basic_shape(self, is_64_bit=False):
+    def get_basic_shape(self):
         # XXX: Should this code even really know about stack frame layout of
         # the JIT?
-        if is_64_bit:
-            return [chr(self.LOC_EBP_PLUS  | 8),
-                    chr(self.LOC_EBP_MINUS | 8),
-                    chr(self.LOC_EBP_MINUS | 16),
-                    chr(self.LOC_EBP_MINUS | 24),
-                    chr(self.LOC_EBP_MINUS | 32),
-                    chr(self.LOC_EBP_MINUS | 40),
-                    chr(self.LOC_EBP_PLUS  | 0),
+        if self.is_64_bit:
+            return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   8(%rbp)
+                    chr(self.LOC_EBP_MINUS | 4),    # saved %rbx:  at  -8(%rbp)
+                    chr(self.LOC_EBP_MINUS | 8),    # saved %r12:  at -16(%rbp)
+                    chr(self.LOC_EBP_MINUS | 12),   # saved %r13:  at -24(%rbp)
+                    chr(self.LOC_EBP_MINUS | 16),   # saved %r14:  at -32(%rbp)
+                    chr(self.LOC_EBP_MINUS | 20),   # saved %r15:  at -40(%rbp)
+                    chr(self.LOC_EBP_PLUS  | 0),    # saved %rbp:  at    (%rbp)
                     chr(0)]
         else:
             return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   4(%ebp)
@@ -366,7 +367,11 @@
         shape.append(chr(number | flag))
 
     def add_frame_offset(self, shape, offset):
-        assert (offset & 3) == 0
+        if self.is_64_bit:
+            assert (offset & 7) == 0
+            offset >>= 1
+        else:
+            assert (offset & 3) == 0
         if offset >= 0:
             num = self.LOC_EBP_PLUS | offset
         else:
@@ -518,7 +523,7 @@
     def initialize(self):
         pass
 
-    def get_basic_shape(self, is_64_bit=False):
+    def get_basic_shape(self):
         return []
 
     def add_frame_offset(self, shape, offset):
@@ -769,11 +774,19 @@
         self.generate_function('malloc_unicode', malloc_unicode,
                                [lltype.Signed])
 
-        # Rarely called: allocate a fixed-size amount of bytes, but
-        # not in the nursery, because it is too big.  Implemented like
-        # malloc_nursery_slowpath() above.
-        self.generate_function('malloc_fixedsize', malloc_nursery_slowpath,
-                               [lltype.Signed])
+        # Never called as far as I can tell, but there for completeness:
+        # allocate a fixed-size object, but not in the nursery, because
+        # it is too big.
+        def malloc_big_fixedsize(size, tid):
+            if self.DEBUG:
+                self._random_usage_of_xmm_registers()
+            type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
+            check_typeid(type_id)
+            return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                   type_id, size,
+                                                   False, False, False)
+        self.generate_function('malloc_big_fixedsize', malloc_big_fixedsize,
+                               [lltype.Signed] * 2)
 
     def _bh_malloc(self, sizedescr):
         from pypy.rpython.memory.gctypelayout import check_typeid
diff --git a/pypy/jit/backend/llsupport/rewrite.py b/pypy/jit/backend/llsupport/rewrite.py
--- a/pypy/jit/backend/llsupport/rewrite.py
+++ b/pypy/jit/backend/llsupport/rewrite.py
@@ -96,8 +96,10 @@
     def handle_new_fixedsize(self, descr, op):
         assert isinstance(descr, SizeDescr)
         size = descr.size
-        self.gen_malloc_nursery(size, op.result)
-        self.gen_initialize_tid(op.result, descr.tid)
+        if self.gen_malloc_nursery(size, op.result):
+            self.gen_initialize_tid(op.result, descr.tid)
+        else:
+            self.gen_malloc_fixedsize(size, descr.tid, op.result)
 
     def handle_new_array(self, arraydescr, op):
         v_length = op.getarg(0)
@@ -112,8 +114,8 @@
                 pass    # total_size is still -1
         elif arraydescr.itemsize == 0:
             total_size = arraydescr.basesize
-        if 0 <= total_size <= 0xffffff:     # up to 16MB, arbitrarily
-            self.gen_malloc_nursery(total_size, op.result)
+        if (total_size >= 0 and
+                self.gen_malloc_nursery(total_size, op.result)):
             self.gen_initialize_tid(op.result, arraydescr.tid)
             self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
         elif self.gc_ll_descr.kind == 'boehm':
@@ -147,13 +149,22 @@
         # mark 'v_result' as freshly malloced
         self.recent_mallocs[v_result] = None
 
-    def gen_malloc_fixedsize(self, size, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, Const(size)).
-        Note that with the framework GC, this should be called very rarely.
+    def gen_malloc_fixedsize(self, size, typeid, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, ...).
+        Used on Boehm, and on the framework GC for large fixed-size
+        mallocs.  (For all I know this latter case never occurs in
+        practice, but better safe than sorry.)
         """
-        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
-        self._gen_call_malloc_gc([ConstInt(addr), ConstInt(size)], v_result,
-                                 self.gc_ll_descr.malloc_fixedsize_descr)
+        if self.gc_ll_descr.fielddescr_tid is not None:  # framework GC
+            assert (size & (WORD-1)) == 0, "size not aligned?"
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_big_fixedsize')
+            args = [ConstInt(addr), ConstInt(size), ConstInt(typeid)]
+            descr = self.gc_ll_descr.malloc_big_fixedsize_descr
+        else:                                            # Boehm
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
+            args = [ConstInt(addr), ConstInt(size)]
+            descr = self.gc_ll_descr.malloc_fixedsize_descr
+        self._gen_call_malloc_gc(args, v_result, descr)
 
     def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result):
         """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) for Boehm."""
@@ -211,8 +222,7 @@
         """
         size = self.round_up_for_allocation(size)
         if not self.gc_ll_descr.can_use_nursery_malloc(size):
-            self.gen_malloc_fixedsize(size, v_result)
-            return
+            return False
         #
         op = None
         if self._op_malloc_nursery is not None:
@@ -238,6 +248,7 @@
         self._previous_size = size
         self._v_last_malloced_nursery = v_result
         self.recent_mallocs[v_result] = None
+        return True
 
     def gen_initialize_tid(self, v_newgcobj, tid):
         if self.gc_ll_descr.fielddescr_tid is not None:
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -57,6 +57,7 @@
         def frame_pos(n):
             return -4*(4+n)
         gcrootmap = GcRootMap_asmgcc()
+        gcrootmap.is_64_bit = False
         num1 = frame_pos(-5)
         num1a = num1|2
         num2 = frame_pos(55)
diff --git a/pypy/jit/backend/llsupport/test/test_rewrite.py b/pypy/jit/backend/llsupport/test/test_rewrite.py
--- a/pypy/jit/backend/llsupport/test/test_rewrite.py
+++ b/pypy/jit/backend/llsupport/test/test_rewrite.py
@@ -119,12 +119,19 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
-                                %(adescr.basesize + 10 * adescr.itemsize)d, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 10, descr=alendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(adescr.basesize)d,        \
+                                10,                         \
+                                %(adescr.itemsize)d,        \
+                                %(adescr.lendescr.offset)d, \
+                                descr=malloc_array_descr)
             jump()
         """)
+##      should ideally be:
+##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
+##                                %(adescr.basesize + 10 * adescr.itemsize)d, \
+##                                descr=malloc_fixedsize_descr)
+##            setfield_gc(p0, 10, descr=alendescr)
 
     def test_new_array_variable(self):
         self.check_rewrite("""
@@ -178,13 +185,20 @@
             jump()
         """, """
             [i1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
-                                %(unicodedescr.basesize +       \
-                                  10 * unicodedescr.itemsize)d, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 10, descr=unicodelendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(unicodedescr.basesize)d,  \
+                                10,                         \
+                                %(unicodedescr.itemsize)d,  \
+                                %(unicodelendescr.offset)d, \
+                                descr=malloc_array_descr)
             jump()
         """)
+##      should ideally be:
+##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
+##                                %(unicodedescr.basesize +       \
+##                                  10 * unicodedescr.itemsize)d, \
+##                                descr=malloc_fixedsize_descr)
+##            setfield_gc(p0, 10, descr=unicodelendescr)
 
 
 class TestFramework(RewriteTests):
@@ -203,7 +217,7 @@
         #
         class FakeCPU(object):
             def sizeof(self, STRUCT):
-                descr = SizeDescrWithVTable(102)
+                descr = SizeDescrWithVTable(104)
                 descr.tid = 9315
                 return descr
         self.cpu = FakeCPU()
@@ -368,11 +382,9 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
-                                %(bdescr.basesize + 104)d,    \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 8765, descr=tiddescr)
-            setfield_gc(p0, 103, descr=blendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
+                                %(bdescr.tid)d, 103,          \
+                                descr=malloc_array_descr)
             jump()
         """)
 
@@ -435,9 +447,8 @@
             jump()
         """, """
             [p1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), 104, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 9315, descr=tiddescr)
+            p0 = call_malloc_gc(ConstClass(malloc_big_fixedsize), 104, 9315, \
+                                descr=malloc_big_fixedsize_descr)
             setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
             jump()
         """)
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -266,6 +266,38 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
 
+    def test_compile_big_bridge_out_of_small_loop(self):
+        i0 = BoxInt()
+        faildescr1 = BasicFailDescr(1)
+        looptoken = JitCellToken()
+        operations = [
+            ResOperation(rop.GUARD_FALSE, [i0], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(2)),
+            ]
+        inputargs = [i0]
+        operations[0].setfailargs([i0])
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+
+        i1list = [BoxInt() for i in range(1000)]
+        bridge = []
+        iprev = i0
+        for i1 in i1list:
+            bridge.append(ResOperation(rop.INT_ADD, [iprev, ConstInt(1)], i1))
+            iprev = i1
+        bridge.append(ResOperation(rop.GUARD_FALSE, [i0], None,
+                                   descr=BasicFailDescr(3)))
+        bridge.append(ResOperation(rop.FINISH, [], None,
+                                   descr=BasicFailDescr(4)))
+        bridge[-2].setfailargs(i1list)
+
+        self.cpu.compile_bridge(faildescr1, [i0], bridge, looptoken)
+
+        fail = self.cpu.execute_token(looptoken, 1)
+        assert fail.identifier == 3
+        for i in range(1000):
+            res = self.cpu.get_latest_value_int(i)
+            assert res == 2 + i
+
     def test_get_latest_value_count(self):
         i0 = BoxInt()
         i1 = BoxInt()
@@ -572,7 +604,7 @@
                                          [funcbox, BoxInt(arg1), BoxInt(arg2)],
                                          'int', descr=calldescr)
             assert res.getint() == f(arg1, arg2)
-        
+
     def test_call_stack_alignment(self):
         # test stack alignment issues, notably for Mac OS/X.
         # also test the ordering of the arguments.
@@ -1458,7 +1490,8 @@
     def test_noops(self):
         c_box = self.alloc_string("hi there").constbox()
         c_nest = ConstInt(0)
-        self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest], 'void')
+        c_id = ConstInt(0)
+        self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest, c_id], 'void')
         self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
                                                c_nest, c_nest], 'void')
 
@@ -3029,7 +3062,7 @@
             ResOperation(rop.JUMP, [i2], None, descr=targettoken2),
             ]
         self.cpu.compile_bridge(faildescr, inputargs, operations, looptoken)
-        
+
         fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 3
         res = self.cpu.get_latest_value_int(0)
@@ -3074,7 +3107,7 @@
             assert len(mc) == len(ops)
             for i in range(len(mc)):
                 assert mc[i].split("\t")[-1].startswith(ops[i])
-            
+
         data = ctypes.string_at(info.asmaddr, info.asmlen)
         mc = list(machine_code_dump(data, info.asmaddr, cpuname))
         lines = [line for line in mc if line.count('\t') == 2]
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -88,7 +88,6 @@
         self._debug = False
         self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
         self.fail_boxes_count = 0
-        self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
         self.propagate_exception_path = 0
@@ -442,10 +441,8 @@
         looppos = self.mc.get_relative_pos()
         looptoken._x86_loop_code = looppos
         clt.frame_depth = -1     # temporarily
-        clt.param_depth = -1     # temporarily
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        frame_depth = self._assemble(regalloc, operations)
         clt.frame_depth = frame_depth
-        clt.param_depth = param_depth
         #
         size_excluding_failure_stuff = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
@@ -459,8 +456,7 @@
             rawstart + size_excluding_failure_stuff,
             rawstart))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
         self.patch_pending_failure_recoveries(rawstart)
         #
         ops_offset = self.mc.ops_offset
@@ -500,14 +496,13 @@
             assert ([loc.assembler() for loc in arglocs] ==
                     [loc.assembler() for loc in faildescr._x86_debug_faillocs])
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        fail_depths = faildescr._x86_current_depths
         startpos = self.mc.get_relative_pos()
-        operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
+        operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
                                              self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        frame_depth = self._assemble(regalloc, operations)
         codeendpos = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
         fullsize = self.mc.get_relative_pos()
@@ -517,19 +512,16 @@
         debug_print("bridge out of Guard %d has address %x to %x" %
                     (descr_number, rawstart, rawstart + codeendpos))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
         self.patch_pending_failure_recoveries(rawstart)
         if not we_are_translated():
             # for the benefit of tests
             faildescr._x86_bridge_frame_depth = frame_depth
-            faildescr._x86_bridge_param_depth = param_depth
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart)
         ops_offset = self.mc.ops_offset
         self.fixup_target_tokens(rawstart)
         self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
-        self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -700,15 +692,12 @@
         regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
-        frame_depth = regalloc.fm.get_frame_depth()
-        param_depth = regalloc.param_depth
+        frame_depth = regalloc.get_final_frame_depth()
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
             target_frame_depth = jump_target_descr._x86_clt.frame_depth
-            target_param_depth = jump_target_descr._x86_clt.param_depth
             frame_depth = max(frame_depth, target_frame_depth)
-            param_depth = max(param_depth, target_param_depth)
-        return frame_depth, param_depth
+        return frame_depth
 
     def _patchable_stackadjust(self):
         # stack adjustment LEA
@@ -892,10 +881,9 @@
         genop_math_list[oopspecindex](self, op, arglocs, resloc)
 
     def regalloc_perform_with_guard(self, op, guard_op, faillocs,
-                                    arglocs, resloc, current_depths):
+                                    arglocs, resloc):
         faildescr = guard_op.getdescr()
         assert isinstance(faildescr, AbstractFailDescr)
-        faildescr._x86_current_depths = current_depths
         failargs = guard_op.getfailargs()
         guard_opnum = guard_op.getopnum()
         guard_token = self.implement_guard_recovery(guard_opnum,
@@ -911,10 +899,9 @@
             # must be added by the genop_guard_list[]()
             assert guard_token is self.pending_guard_tokens[-1]
 
-    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
-                               current_depths):
+    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
         self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
-                                         resloc, current_depths)
+                                         resloc)
 
     def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
         self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
@@ -1038,13 +1025,14 @@
                     self.mc.MOV(tmp, loc)
                     self.mc.MOV_sr(p, tmp.value)
             p += loc.get_width()
-        self._regalloc.reserve_param(p//WORD)
         # x is a location
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
         #
         if callconv != FFI_DEFAULT_ABI:
             self._fix_stdcall(callconv, p)
+        #
+        self._regalloc.needed_extra_stack_locations(p//WORD)
 
     def _fix_stdcall(self, callconv, p):
         from pypy.rlib.clibffi import FFI_STDCALL
@@ -1127,9 +1115,9 @@
             x = r10
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
 
-        self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
+        self._regalloc.needed_extra_stack_locations(len(pass_on_stack))
 
     def call(self, addr, args, res):
         force_index = self.write_new_force_index()
@@ -2136,7 +2124,6 @@
             if reg in save_registers:
                 self.mc.MOV_sr(p, reg.value)
                 p += WORD
-        self._regalloc.reserve_param(p//WORD)
         #
         if gcrootmap.is_shadow_stack:
             args = []
@@ -2192,6 +2179,7 @@
             if reg in save_registers:
                 self.mc.MOV_rs(reg.value, p)
                 p += WORD
+        self._regalloc.needed_extra_stack_locations(p//WORD)
 
     def call_reacquire_gil(self, gcrootmap, save_loc):
         # save the previous result (eax/xmm0) into the stack temporarily.
@@ -2199,7 +2187,6 @@
         # to save xmm0 in this case.
         if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
             self.mc.MOV_sr(WORD, save_loc.value)
-            self._regalloc.reserve_param(2)
         # call the reopenstack() function (also reacquiring the GIL)
         if gcrootmap.is_shadow_stack:
             args = []
@@ -2219,6 +2206,7 @@
         # restore the result from the stack
         if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
             self.mc.MOV_rs(save_loc.value, WORD)
+            self._regalloc.needed_extra_stack_locations(2)
 
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
@@ -2495,11 +2483,6 @@
         # copy of heap(nursery_free_adr), so that the final MOV below is
         # a no-op.
 
-        # reserve room for the argument to the real malloc and the
-        # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
-        # word)
-        self._regalloc.reserve_param(1+16)
-
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
         if not shadow_stack:
@@ -2510,6 +2493,11 @@
         slowpath_addr2 = self.malloc_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 
+        # reserve room for the argument to the real malloc and the
+        # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
+        # word)
+        self._regalloc.needed_extra_stack_locations(1+16)
+
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -168,7 +168,7 @@
 
     def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
-        self.param_depth = 0
+        self.min_frame_depth = 0
         cpu = self.assembler.cpu
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
@@ -193,11 +193,9 @@
             self.min_bytes_before_label = 13
         return operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
-                       allgcrefs):
+    def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs):
         operations = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
-        self.param_depth = prev_depths[1]
         self.min_bytes_before_label = 0
         return operations
 
@@ -205,8 +203,15 @@
         self.min_bytes_before_label = max(self.min_bytes_before_label,
                                           at_least_position)
 
-    def reserve_param(self, n):
-        self.param_depth = max(self.param_depth, n)
+    def needed_extra_stack_locations(self, n):
+        # call *after* you needed extra stack locations: (%esp), (%esp+4)...
+        min_frame_depth = self.fm.get_frame_depth() + n
+        if min_frame_depth > self.min_frame_depth:
+            self.min_frame_depth = min_frame_depth
+
+    def get_final_frame_depth(self):
+        self.needed_extra_stack_locations(0)  # update min_frame_depth
+        return self.min_frame_depth
 
     def _set_initial_bindings(self, inputargs):
         if IS_X86_64:
@@ -376,25 +381,12 @@
     def locs_for_fail(self, guard_op):
         return [self.loc(v) for v in guard_op.getfailargs()]
 
-    def get_current_depth(self):
-        # return (self.fm.frame_depth, self.param_depth), but trying to share
-        # the resulting tuple among several calls
-        arg0 = self.fm.get_frame_depth()
-        arg1 = self.param_depth
-        result = self.assembler._current_depths_cache
-        if result[0] != arg0 or result[1] != arg1:
-            result = (arg0, arg1)
-            self.assembler._current_depths_cache = result
-        return result
-
     def perform_with_guard(self, op, guard_op, arglocs, result_loc):
         faillocs = self.locs_for_fail(guard_op)
         self.rm.position += 1
         self.xrm.position += 1
-        current_depths = self.get_current_depth()
         self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
-                                                   arglocs, result_loc,
-                                                   current_depths)
+                                                   arglocs, result_loc)
         if op.result is not None:
             self.possibly_free_var(op.result)
         self.possibly_free_vars(guard_op.getfailargs())
@@ -407,10 +399,8 @@
                                                       arglocs))
             else:
                 self.assembler.dump('%s(%s)' % (guard_op, arglocs))
-        current_depths = self.get_current_depth()
         self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
-                                              result_loc,
-                                              current_depths)
+                                              result_loc)
         self.possibly_free_vars(guard_op.getfailargs())
 
     def PerformDiscard(self, op, arglocs):
@@ -1393,7 +1383,7 @@
         self.force_spill_var(op.getarg(0))
 
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
-        shape = gcrootmap.get_basic_shape(IS_X86_64)
+        shape = gcrootmap.get_basic_shape()
         for v, val in self.fm.bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -28,7 +28,7 @@
 
 class MockGcRootMap(object):
     is_shadow_stack = False
-    def get_basic_shape(self, is_64_bit):
+    def get_basic_shape(self):
         return ['shape']
     def add_frame_offset(self, shape, offset):
         shape.append(offset)
@@ -184,6 +184,8 @@
         self.addrs[1] = self.addrs[0] + 64
         self.calls = []
         def malloc_slowpath(size):
+            if self.gcrootmap is not None:   # hook
+                self.gcrootmap.hook_malloc_slowpath()
             self.calls.append(size)
             # reset the nursery
             nadr = rffi.cast(lltype.Signed, self.nursery)
@@ -257,3 +259,218 @@
         assert gc_ll_descr.addrs[0] == nurs_adr + 24
         # this should call slow path once
         assert gc_ll_descr.calls == [24]
+
+    def test_save_regs_around_malloc(self):
+        S1 = lltype.GcStruct('S1')
+        S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
+                                   ('s1', lltype.Ptr(S1)),
+                                   ('s2', lltype.Ptr(S1)),
+                                   ('s3', lltype.Ptr(S1)),
+                                   ('s4', lltype.Ptr(S1)),
+                                   ('s5', lltype.Ptr(S1)),
+                                   ('s6', lltype.Ptr(S1)),
+                                   ('s7', lltype.Ptr(S1)),
+                                   ('s8', lltype.Ptr(S1)),
+                                   ('s9', lltype.Ptr(S1)),
+                                   ('s10', lltype.Ptr(S1)),
+                                   ('s11', lltype.Ptr(S1)),
+                                   ('s12', lltype.Ptr(S1)),
+                                   ('s13', lltype.Ptr(S1)),
+                                   ('s14', lltype.Ptr(S1)),
+                                   ('s15', lltype.Ptr(S1)))
+        cpu = self.cpu
+        self.namespace = self.namespace.copy()
+        for i in range(16):
+            self.namespace['ds%i' % i] = cpu.fielddescrof(S2, 's%d' % i)
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=ds0)
+        p2 = getfield_gc(p0, descr=ds1)
+        p3 = getfield_gc(p0, descr=ds2)
+        p4 = getfield_gc(p0, descr=ds3)
+        p5 = getfield_gc(p0, descr=ds4)
+        p6 = getfield_gc(p0, descr=ds5)
+        p7 = getfield_gc(p0, descr=ds6)
+        p8 = getfield_gc(p0, descr=ds7)
+        p9 = getfield_gc(p0, descr=ds8)
+        p10 = getfield_gc(p0, descr=ds9)
+        p11 = getfield_gc(p0, descr=ds10)
+        p12 = getfield_gc(p0, descr=ds11)
+        p13 = getfield_gc(p0, descr=ds12)
+        p14 = getfield_gc(p0, descr=ds13)
+        p15 = getfield_gc(p0, descr=ds14)
+        p16 = getfield_gc(p0, descr=ds15)
+        #
+        # now all registers are in use
+        p17 = call_malloc_nursery(40)
+        p18 = call_malloc_nursery(40)     # overflow
+        #
+        finish(p1, p2, p3, p4, p5, p6, p7, p8,         \
+               p9, p10, p11, p12, p13, p14, p15, p16)
+        '''
+        s2 = lltype.malloc(S2)
+        for i in range(16):
+            setattr(s2, 's%d' % i, lltype.malloc(S1))
+        s2ref = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
+        #
+        self.interpret(ops, [s2ref])
+        gc_ll_descr = cpu.gc_ll_descr
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.calls == [40]
+        # check the returned pointers
+        for i in range(16):
+            s1ref = self.cpu.get_latest_value_ref(i)
+            s1 = lltype.cast_opaque_ptr(lltype.Ptr(S1), s1ref)
+            assert s1 == getattr(s2, 's%d' % i)
+
+
+class MockShadowStackRootMap(MockGcRootMap):
+    is_shadow_stack = True
+    MARKER_FRAME = 88       # this marker follows the frame addr
+    S1 = lltype.GcStruct('S1')
+
+    def __init__(self):
+        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 20,
+                                   flavor='raw')
+        # root_stack_top
+        self.addrs[0] = rffi.cast(lltype.Signed, self.addrs) + 3*WORD
+        # random stuff
+        self.addrs[1] = 123456
+        self.addrs[2] = 654321
+        self.check_initial_and_final_state()
+        self.callshapes = {}
+        self.should_see = []
+
+    def check_initial_and_final_state(self):
+        assert self.addrs[0] == rffi.cast(lltype.Signed, self.addrs) + 3*WORD
+        assert self.addrs[1] == 123456
+        assert self.addrs[2] == 654321
+
+    def get_root_stack_top_addr(self):
+        return rffi.cast(lltype.Signed, self.addrs)
+
+    def compress_callshape(self, shape, datablockwrapper):
+        assert shape[0] == 'shape'
+        return ['compressed'] + shape[1:]
+
+    def write_callshape(self, mark, force_index):
+        assert mark[0] == 'compressed'
+        assert force_index not in self.callshapes
+        assert force_index == 42 + len(self.callshapes)
+        self.callshapes[force_index] = mark
+
+    def hook_malloc_slowpath(self):
+        num_entries = self.addrs[0] - rffi.cast(lltype.Signed, self.addrs)
+        assert num_entries == 5*WORD    # 3 initially, plus 2 by the asm frame
+        assert self.addrs[1] == 123456  # unchanged
+        assert self.addrs[2] == 654321  # unchanged
+        frame_addr = self.addrs[3]                   # pushed by the asm frame
+        assert self.addrs[4] == self.MARKER_FRAME    # pushed by the asm frame
+        #
+        from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS
+        addr = rffi.cast(rffi.CArrayPtr(lltype.Signed),
+                         frame_addr + FORCE_INDEX_OFS)
+        force_index = addr[0]
+        assert force_index == 43    # in this test: the 2nd call_malloc_nursery
+        #
+        # The callshapes[43] saved above should list addresses both in the
+        # COPY_AREA and in the "normal" stack, where all the 16 values p1-p16
+        # of test_save_regs_at_correct_place should have been stored.  Here
+        # we replace them with new addresses, to emulate a moving GC.
+        shape = self.callshapes[force_index]
+        assert len(shape[1:]) == len(self.should_see)
+        new_objects = [None] * len(self.should_see)
+        for ofs in shape[1:]:
+            assert isinstance(ofs, int)    # not a register at all here
+            addr = rffi.cast(rffi.CArrayPtr(lltype.Signed), frame_addr + ofs)
+            contains = addr[0]
+            for j in range(len(self.should_see)):
+                obj = self.should_see[j]
+                if contains == rffi.cast(lltype.Signed, obj):
+                    assert new_objects[j] is None   # duplicate?
+                    break
+            else:
+                assert 0   # the value read from the stack looks random?
+            new_objects[j] = lltype.malloc(self.S1)
+            addr[0] = rffi.cast(lltype.Signed, new_objects[j])
+        self.should_see[:] = new_objects
+
+
+class TestMallocShadowStack(BaseTestRegalloc):
+
+    def setup_method(self, method):
+        cpu = CPU(None, None)
+        cpu.gc_ll_descr = GCDescrFastpathMalloc()
+        cpu.gc_ll_descr.gcrootmap = MockShadowStackRootMap()
+        cpu.setup_once()
+        for i in range(42):
+            cpu.reserve_some_free_fail_descr_number()
+        self.cpu = cpu
+
+    def test_save_regs_at_correct_place(self):
+        cpu = self.cpu
+        gc_ll_descr = cpu.gc_ll_descr
+        S1 = gc_ll_descr.gcrootmap.S1
+        S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
+                                   ('s1', lltype.Ptr(S1)),
+                                   ('s2', lltype.Ptr(S1)),
+                                   ('s3', lltype.Ptr(S1)),
+                                   ('s4', lltype.Ptr(S1)),
+                                   ('s5', lltype.Ptr(S1)),
+                                   ('s6', lltype.Ptr(S1)),
+                                   ('s7', lltype.Ptr(S1)),
+                                   ('s8', lltype.Ptr(S1)),
+                                   ('s9', lltype.Ptr(S1)),
+                                   ('s10', lltype.Ptr(S1)),
+                                   ('s11', lltype.Ptr(S1)),
+                                   ('s12', lltype.Ptr(S1)),
+                                   ('s13', lltype.Ptr(S1)),
+                                   ('s14', lltype.Ptr(S1)),
+                                   ('s15', lltype.Ptr(S1)))
+        self.namespace = self.namespace.copy()
+        for i in range(16):
+            self.namespace['ds%i' % i] = cpu.fielddescrof(S2, 's%d' % i)
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=ds0)
+        p2 = getfield_gc(p0, descr=ds1)
+        p3 = getfield_gc(p0, descr=ds2)
+        p4 = getfield_gc(p0, descr=ds3)
+        p5 = getfield_gc(p0, descr=ds4)
+        p6 = getfield_gc(p0, descr=ds5)
+        p7 = getfield_gc(p0, descr=ds6)
+        p8 = getfield_gc(p0, descr=ds7)
+        p9 = getfield_gc(p0, descr=ds8)
+        p10 = getfield_gc(p0, descr=ds9)
+        p11 = getfield_gc(p0, descr=ds10)
+        p12 = getfield_gc(p0, descr=ds11)
+        p13 = getfield_gc(p0, descr=ds12)
+        p14 = getfield_gc(p0, descr=ds13)
+        p15 = getfield_gc(p0, descr=ds14)
+        p16 = getfield_gc(p0, descr=ds15)
+        #
+        # now all registers are in use
+        p17 = call_malloc_nursery(40)
+        p18 = call_malloc_nursery(40)     # overflow
+        #
+        finish(p1, p2, p3, p4, p5, p6, p7, p8,         \
+               p9, p10, p11, p12, p13, p14, p15, p16)
+        '''
+        s2 = lltype.malloc(S2)
+        for i in range(16):
+            s1 = lltype.malloc(S1)
+            setattr(s2, 's%d' % i, s1)
+            gc_ll_descr.gcrootmap.should_see.append(s1)
+        s2ref = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
+        #
+        self.interpret(ops, [s2ref])
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.calls == [40]
+        gc_ll_descr.gcrootmap.check_initial_and_final_state()
+        # check the returned pointers
+        for i in range(16):
+            s1ref = self.cpu.get_latest_value_ref(i)
+            s1 = lltype.cast_opaque_ptr(lltype.Ptr(S1), s1ref)
+            for j in range(16):
+                assert s1 != getattr(s2, 's%d' % j)
+            assert s1 == gc_ll_descr.gcrootmap.should_see[i]
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -34,7 +34,6 @@
         '''
         loop = self.interpret(ops, [0])
         previous = loop._jitcelltoken.compiled_loop_token.frame_depth
-        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         assert self.getint(0) == 20
         ops = '''
         [i1]
@@ -51,7 +50,6 @@
         bridge = self.attach_bridge(ops, loop, -2)
         descr = loop.operations[3].getdescr()
         new = descr._x86_bridge_frame_depth
-        assert descr._x86_bridge_param_depth == 0
         # the force_spill() forces the stack to grow
         assert new > previous
         fail = self.run(loop, 0)
@@ -116,10 +114,8 @@
         loop_frame_depth = loop._jitcelltoken.compiled_loop_token.frame_depth
         bridge = self.attach_bridge(ops, loop, 6)
         guard_op = loop.operations[6]
-        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         # the force_spill() forces the stack to grow
         assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
-        assert guard_op.getdescr()._x86_bridge_param_depth == 0
         self.run(loop, 0, 0, 0, 0, 0, 0)
         assert self.getint(0) == 1
         assert self.getint(1) == 20
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -606,23 +606,37 @@
         assert self.getints(9) == [0, 1, 1, 1, 1, 1, 1, 1, 1]
 
 class TestRegAllocCallAndStackDepth(BaseTestRegalloc):
-    def expected_param_depth(self, num_args):
+    def expected_frame_depth(self, num_call_args, num_pushed_input_args=0):
         # Assumes the arguments are all non-float
         if IS_X86_32:
-            return num_args
+            extra_esp = num_call_args
+            return extra_esp
         elif IS_X86_64:
-            return max(num_args - 6, 0)
+            # 'num_pushed_input_args' is for X86_64 only
+            extra_esp = max(num_call_args - 6, 0)
+            return num_pushed_input_args + extra_esp
 
     def test_one_call(self):
         ops = '''
-        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b]
         i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
-        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b)
         '''
-        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
-        assert self.getints(10) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 8])
+        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 8]
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(1)
+        assert clt.frame_depth == self.expected_frame_depth(1, 5)
+
+    def test_one_call_reverse(self):
+        ops = '''
+        [i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b, i0]
+        i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b)
+        '''
+        loop = self.interpret(ops, [7, 9, 9 ,9, 9, 9, 9, 9, 9, 8, 4])
+        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 8]
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.frame_depth == self.expected_frame_depth(1, 6)
 
     def test_two_calls(self):
         ops = '''
@@ -634,7 +648,7 @@
         loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
         assert self.getints(10) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9]
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == self.expected_frame_depth(2, 5)
 
     def test_call_many_arguments(self):
         # NB: The first and last arguments in the call are constants. This
@@ -648,25 +662,31 @@
         loop = self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
         assert self.getint(0) == 55
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(10)
+        assert clt.frame_depth == self.expected_frame_depth(10)
 
     def test_bridge_calls_1(self):
         ops = '''
         [i0, i1]
         i2 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
-        guard_value(i2, 0, descr=fdescr1) [i2, i1]
+        guard_value(i2, 0, descr=fdescr1) [i2, i0, i1]
         finish(i1)
         '''
         loop = self.interpret(ops, [4, 7])
         assert self.getint(0) == 5
+        clt = loop._jitcelltoken.compiled_loop_token
+        orgdepth = clt.frame_depth
+        assert orgdepth == self.expected_frame_depth(1, 2)
+
         ops = '''
-        [i2, i1]
+        [i2, i0, i1]
         i3 = call(ConstClass(f2ptr), i2, i1, descr=f2_calldescr)        
-        finish(i3, descr=fdescr2)        
+        finish(i3, i0, descr=fdescr2)
         '''
         bridge = self.attach_bridge(ops, loop, -2)
 
-        assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == max(orgdepth, self.expected_frame_depth(2, 2))
+        assert loop.operations[-2].getdescr()._x86_bridge_frame_depth == \
+            self.expected_frame_depth(2, 2)
 
         self.run(loop, 4, 7)
         assert self.getint(0) == 5*7
@@ -676,10 +696,14 @@
         [i0, i1]
         i2 = call(ConstClass(f2ptr), i0, i1, descr=f2_calldescr)
         guard_value(i2, 0, descr=fdescr1) [i2]
-        finish(i1)
+        finish(i2)
         '''
         loop = self.interpret(ops, [4, 7])
         assert self.getint(0) == 4*7
+        clt = loop._jitcelltoken.compiled_loop_token
+        orgdepth = clt.frame_depth
+        assert orgdepth == self.expected_frame_depth(2)
+
         ops = '''
         [i2]
         i3 = call(ConstClass(f1ptr), i2, descr=f1_calldescr)        
@@ -687,7 +711,9 @@
         '''
         bridge = self.attach_bridge(ops, loop, -2)
 
-        assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == max(orgdepth, self.expected_frame_depth(1))
+        assert loop.operations[-2].getdescr()._x86_bridge_frame_depth == \
+            self.expected_frame_depth(1)
 
         self.run(loop, 4, 7)
         assert self.getint(0) == 29
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -371,7 +371,7 @@
 
         operations = [
             ResOperation(rop.LABEL, [i0], None, descr=targettoken),
-            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0], None),
+            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0, 0], None),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
@@ -390,7 +390,7 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0], None),
+            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0, 0], None),
             ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
@@ -531,12 +531,12 @@
         loop = """
         [i0]
         label(i0, descr=preambletoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
         label(i1, descr=targettoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i11 = int_add(i1, 1)
         i12 = int_ge(i11, 10)
         guard_false(i12) []
@@ -569,7 +569,7 @@
         loop = """
         [i0]
         label(i0, descr=targettoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -1379,7 +1379,8 @@
         elif opnum == rop.GUARD_NO_OVERFLOW:
             # Produced by int_xxx_ovf().  The pc is just after the opcode.
             # We get here because it did not used to overflow, but now it does.
-            return get_llexception(self.cpu, OverflowError())
+            if not dont_change_position:
+                return get_llexception(self.cpu, OverflowError())
         #
         elif opnum == rop.GUARD_OVERFLOW:
             # Produced by int_xxx_ovf().  The pc is just after the opcode.
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -289,8 +289,21 @@
             assert isinstance(token, TargetToken)
             assert token.original_jitcell_token is None
             token.original_jitcell_token = trace.original_jitcell_token
-            
-    
+
+
+def do_compile_loop(metainterp_sd, inputargs, operations, looptoken,
+                    log=True, name=''):
+    metainterp_sd.logger_ops.log_loop(inputargs, operations, -2,
+                                      'compiling', name=name)
+    return metainterp_sd.cpu.compile_loop(inputargs, operations, looptoken,
+                                          log=log, name=name)
+
+def do_compile_bridge(metainterp_sd, faildescr, inputargs, operations,
+                      original_loop_token, log=True):
+    metainterp_sd.logger_ops.log_bridge(inputargs, operations, -2)
+    return metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
+                                            original_loop_token, log=log)
+
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     vinfo = jitdriver_sd.virtualizable_info
     if vinfo is not None:
@@ -319,9 +332,9 @@
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        asminfo = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                  original_jitcell_token,
-                                                  name=loopname)
+        asminfo = do_compile_loop(metainterp_sd, loop.inputargs,
+                                  operations, original_jitcell_token,
+                                  name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -333,7 +346,6 @@
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
-    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     if asminfo is not None:
         ops_offset = asminfo.ops_offset
     else:
@@ -365,9 +377,9 @@
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        asminfo = metainterp_sd.cpu.compile_bridge(faildescr, inputargs,
-                                                   operations,
-                                                   original_loop_token)
+        asminfo = do_compile_bridge(metainterp_sd, faildescr, inputargs,
+                                    operations,
+                                    original_loop_token)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
diff --git a/pypy/jit/metainterp/graphpage.py b/pypy/jit/metainterp/graphpage.py
--- a/pypy/jit/metainterp/graphpage.py
+++ b/pypy/jit/metainterp/graphpage.py
@@ -169,9 +169,9 @@
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
                 if jd_sd._get_printable_location_ptr:
-                    s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+                    s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
                     s = s.replace(',', '.') # we use comma for argument splitting
-                    op_repr = "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+                    op_repr = "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
             lines.append(op_repr)
             if is_interesting_guard(op):
                 tgt = op.getdescr()._debug_suboperations[0]
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -18,6 +18,10 @@
             debug_start("jit-log-noopt-loop")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
+        elif number == -2:
+            debug_start("jit-log-compiling-loop")
+            logops = self._log_operations(inputargs, operations, ops_offset)
+            debug_stop("jit-log-compiling-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, '(%s)' % name , ":", type,
@@ -31,6 +35,10 @@
             debug_start("jit-log-noopt-bridge")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
+        elif number == -2:
+            debug_start("jit-log-compiling-bridge")
+            logops = self._log_operations(inputargs, operations, ops_offset)
+            debug_stop("jit-log-compiling-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
@@ -102,9 +110,9 @@
     def repr_of_resop(self, op, ops_offset=None):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
             jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
-            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
             s = s.replace(',', '.') # we use comma for argument splitting
-            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+            return "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
         if ops_offset is None:
             offset = -1
         else:
@@ -141,7 +149,7 @@
             if target_token.exported_state:
                 for op in target_token.exported_state.inputarg_setup_ops:
                     debug_print('    ' + self.repr_of_resop(op))
-        
+
     def _log_operations(self, inputargs, operations, ops_offset):
         if not have_debug_prints():
             return
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -9,7 +9,7 @@
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
 from pypy.jit.metainterp.optimizeopt.pure import OptPure
 from pypy.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
-from pypy.rlib.jit import PARAMETERS
+from pypy.rlib.jit import PARAMETERS, ENABLE_ALL_OPTS
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 
@@ -30,6 +30,9 @@
 ALL_OPTS_LIST = [name for name, _ in ALL_OPTS]
 ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 
+assert ENABLE_ALL_OPTS == ALL_OPTS_NAMES, (
+    'please fix rlib/jit.py to say ENABLE_ALL_OPTS = %r' % (ALL_OPTS_NAMES,))
+
 def build_opt_chain(metainterp_sd, enable_opts):
     config = metainterp_sd.config
     optimizations = []
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -398,6 +398,40 @@
         with raises(InvalidLoop):
             self.optimize_loop(ops, ops)
 
+    def test_issue1045(self):
+        ops = """
+        [i55]
+        i73 = int_mod(i55, 2)
+        i75 = int_rshift(i73, 63)
+        i76 = int_and(2, i75)
+        i77 = int_add(i73, i76)
+        i81 = int_eq(i77, 1)
+        i0 = int_ge(i55, 1)
+        guard_true(i0) []
+        label(i55)
+        i3 = int_mod(i55, 2)
+        i5 = int_rshift(i3, 63)
+        i6 = int_and(2, i5)
+        i7 = int_add(i3, i6)
+        i8 = int_eq(i7, 1)
+        escape(i8)
+        jump(i55)
+        """
+        expected = """
+        [i55]
+        i73 = int_mod(i55, 2)
+        i75 = int_rshift(i73, 63)
+        i76 = int_and(2, i75)
+        i77 = int_add(i73, i76)
+        i81 = int_eq(i77, 1)
+        i0 = int_ge(i55, 1)
+        guard_true(i0) []
+        label(i55, i81)
+        escape(i81)
+        jump(i55, i81)
+        """
+        self.optimize_loop(ops, expected)
+        
 class OptRenameStrlen(Optimization):
     def propagate_forward(self, op):
         dispatch_opt(self, op)
@@ -423,7 +457,7 @@
         metainterp_sd = FakeMetaInterpStaticData(self.cpu)
         optimize_unroll(metainterp_sd, loop, [OptRenameStrlen(), OptPure()], True)
 
-    def test_optimizer_renaming_boxes(self):
+    def test_optimizer_renaming_boxes1(self):
         ops = """
         [p1]
         i1 = strlen(p1)
@@ -457,7 +491,6 @@
         jump(p1, i11)
         """
         self.optimize_loop(ops, expected)
-
         
 
 class TestLLtype(OptimizeoptTestMultiLabel, LLtypeMixin):
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -9,7 +9,6 @@
 from pypy.jit.metainterp.inliner import Inliner
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.resume import Snapshot
-from pypy.rlib.debug import debug_print
 import sys, os
 
 # FIXME: Introduce some VirtualOptimizer super class instead
@@ -121,9 +120,9 @@
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if cell_token.retraced_count < limit:
                     cell_token.retraced_count += 1
-                    debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+                    #debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
                 else:
-                    debug_print("Retrace count reached, jumping to preamble")
+                    #debug_print("Retrace count reached, jumping to preamble")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
@@ -260,7 +259,7 @@
             if op and op.result:
                 preamble_value = exported_state.exported_values[op.result]
                 value = self.optimizer.getvalue(op.result)
-                if not value.is_virtual():
+                if not value.is_virtual() and not value.is_constant():
                     imp = ValueImporter(self, preamble_value, op)
                     self.optimizer.importable_values[value] = imp
                 newvalue = self.optimizer.getvalue(op.result)
@@ -268,12 +267,14 @@
                 # note that emitting here SAME_AS should not happen, but
                 # in case it does, we would prefer to be suboptimal in asm
                 # to a fatal RPython exception.
-                if newresult is not op.result and not newvalue.is_constant():
+                if newresult is not op.result and \
+                   not self.short_boxes.has_producer(newresult) and \
+                   not newvalue.is_constant():
                     op = ResOperation(rop.SAME_AS, [op.result], newresult)
                     self.optimizer._newoperations.append(op)
-                    if self.optimizer.loop.logops:
-                        debug_print('  Falling back to add extra: ' +
-                                    self.optimizer.loop.logops.repr_of_resop(op))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('  Falling back to add extra: ' +
+                    #                self.optimizer.loop.logops.repr_of_resop(op))
                     
         self.optimizer.flush()
         self.optimizer.emitting_dissabled = False
@@ -339,8 +340,8 @@
             if i == len(newoperations):
                 while j < len(jumpargs):
                     a = jumpargs[j]
-                    if self.optimizer.loop.logops:
-                        debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
                     self.import_box(a, inputargs, short_jumpargs, jumpargs)
                     j += 1
             else:
@@ -351,11 +352,11 @@
                 if op.is_guard():
                     args = args + op.getfailargs()
 
-                if self.optimizer.loop.logops:
-                    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
+                #if self.optimizer.loop.logops:
+                #    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
                 for a in args:
-                    if self.optimizer.loop.logops:
-                        debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
                     self.import_box(a, inputargs, short_jumpargs, jumpargs)
                 i += 1
             newoperations = self.optimizer.get_newoperations()
@@ -368,18 +369,18 @@
         # that is compatible with the virtual state at the start of the loop
         modifier = VirtualStateAdder(self.optimizer)
         final_virtual_state = modifier.get_virtual_state(original_jumpargs)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print('Closed loop with ')
+        #debug_start('jit-log-virtualstate')
+        #virtual_state.debug_print('Closed loop with ')
         bad = {}
         if not virtual_state.generalization_of(final_virtual_state, bad):
             # We ended up with a virtual state that is not compatible
             # and we are thus unable to jump to the start of the loop
-            final_virtual_state.debug_print("Bad virtual state at end of loop, ",
-                                            bad)
-            debug_stop('jit-log-virtualstate')
+            #final_virtual_state.debug_print("Bad virtual state at end of loop, ",
+            #                                bad)
+            #debug_stop('jit-log-virtualstate')
             raise InvalidLoop
             
-        debug_stop('jit-log-virtualstate')
+        #debug_stop('jit-log-virtualstate')
 
         maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
         if self.optimizer.emitted_guards > maxguards:
@@ -442,9 +443,9 @@
                 self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer,
                                              seen)
 
-        if self.optimizer.loop.logops:
-            debug_print('  Emitting short op: ' +
-                        self.optimizer.loop.logops.repr_of_resop(op))
+        #if self.optimizer.loop.logops:
+        #    debug_print('  Emitting short op: ' +
+        #                self.optimizer.loop.logops.repr_of_resop(op))
 
         optimizer.send_extra_operation(op)
         seen[op.result] = True
@@ -525,8 +526,8 @@
         args = jumpop.getarglist()
         modifier = VirtualStateAdder(self.optimizer)
         virtual_state = modifier.get_virtual_state(args)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print("Looking for ")
+        #debug_start('jit-log-virtualstate')
+        #virtual_state.debug_print("Looking for ")
 
         for target in cell_token.target_tokens:
             if not target.virtual_state:
@@ -535,10 +536,10 @@
             extra_guards = []
 
             bad = {}
-            debugmsg = 'Did not match '
+            #debugmsg = 'Did not match '
             if target.virtual_state.generalization_of(virtual_state, bad):
                 ok = True
-                debugmsg = 'Matched '
+                #debugmsg = 'Matched '
             else:
                 try:
                     cpu = self.optimizer.cpu
@@ -547,13 +548,13 @@
                                                          extra_guards)
 
                     ok = True
-                    debugmsg = 'Guarded to match '
+                    #debugmsg = 'Guarded to match '
                 except InvalidLoop:
                     pass
-            target.virtual_state.debug_print(debugmsg, bad)
+            #target.virtual_state.debug_print(debugmsg, bad)
 
             if ok:
-                debug_stop('jit-log-virtualstate')
+                #debug_stop('jit-log-virtualstate')
 
                 values = [self.getvalue(arg)
                           for arg in jumpop.getarglist()]
@@ -574,13 +575,13 @@
                         newop = inliner.inline_op(shop)
                         self.optimizer.send_extra_operation(newop)
                 except InvalidLoop:
-                    debug_print("Inlining failed unexpectedly",
-                                "jumping to preamble instead")
+                    #debug_print("Inlining failed unexpectedly",
+                    #            "jumping to preamble instead")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
                 return True
-        debug_stop('jit-log-virtualstate')
+        #debug_stop('jit-log-virtualstate')
         return False
 
 class ValueImporter(object):
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -681,13 +681,14 @@
             self.synthetic[op] = True
 
     def debug_print(self, logops):
-        debug_start('jit-short-boxes')
-        for box, op in self.short_boxes.items():
-            if op:
-                debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
-            else:
-                debug_print(logops.repr_of_arg(box) + ': None')
-        debug_stop('jit-short-boxes')
+        if 0:
+            debug_start('jit-short-boxes')
+            for box, op in self.short_boxes.items():
+                if op:
+                    debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
+                else:
+                    debug_print(logops.repr_of_arg(box) + ': None')
+            debug_stop('jit-short-boxes')
 
     def operations(self):
         if not we_are_translated(): # For tests
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -974,9 +974,11 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jitdriver_sd, jdindex, self.metainterp.portal_call_depth,
+        self.debug_merge_point(jitdriver_sd, jdindex,
+                               self.metainterp.portal_call_depth,
+                               self.metainterp.call_ids[-1],
                                greenboxes)
-        
+
         if self.metainterp.seen_loop_header_for_jdindex < 0:
             if not any_operation:
                 return
@@ -1028,11 +1030,11 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, jd_index, portal_call_depth, greenkey):
+    def debug_merge_point(self, jitdriver_sd, jd_index, portal_call_depth, current_call_id, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
         loc = jitdriver_sd.warmstate.get_location_str(greenkey)
         debug_print(loc)
-        args = [ConstInt(jd_index), ConstInt(portal_call_depth)] + greenkey
+        args = [ConstInt(jd_index), ConstInt(portal_call_depth), ConstInt(current_call_id)] + greenkey
         self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
@@ -1574,11 +1576,14 @@
         self.call_pure_results = args_dict_box()
         self.heapcache = HeapCache()
 
+        self.call_ids = []
+        self.current_call_id = 0
+
     def retrace_needed(self, trace):
         self.partial_trace = trace
         self.retracing_from = len(self.history.operations) - 1
         self.heapcache.reset()
-        
+
 
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
@@ -1592,6 +1597,8 @@
     def newframe(self, jitcode, greenkey=None):
         if jitcode.is_portal:
             self.portal_call_depth += 1
+            self.call_ids.append(self.current_call_id)
+            self.current_call_id += 1
         if greenkey is not None and self.is_main_jitcode(jitcode):
             self.portal_trace_positions.append(
                     (greenkey, len(self.history.operations)))
@@ -1608,6 +1615,7 @@
         jitcode = frame.jitcode
         if jitcode.is_portal:
             self.portal_call_depth -= 1
+            self.call_ids.pop()
         if frame.greenkey is not None and self.is_main_jitcode(jitcode):
             self.portal_trace_positions.append(
                     (None, len(self.history.operations)))
@@ -1976,7 +1984,7 @@
                 # Found!  Compile it as a loop.
                 # raises in case it works -- which is the common case
                 if self.partial_trace:
-                    if  start != self.retracing_from: 
+                    if  start != self.retracing_from:
                         raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
                 self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
@@ -2064,11 +2072,12 @@
             pass # XXX we want to do something special in resume descr,
                  # but not now
         elif opnum == rop.GUARD_NO_OVERFLOW:   # an overflow now detected
-            self.execute_raised(OverflowError(), constant=True)
-            try:
-                self.finishframe_exception()
-            except ChangeFrame:
-                pass
+            if not dont_change_position:
+                self.execute_raised(OverflowError(), constant=True)
+                try:
+                    self.finishframe_exception()
+                except ChangeFrame:
+                    pass
         elif opnum == rop.GUARD_OVERFLOW:      # no longer overflowing
             self.clear_exception()
         else:
@@ -2084,7 +2093,7 @@
             if not token.target_tokens:
                 return None
         return token
-        
+
     def compile_loop(self, original_boxes, live_arg_boxes, start, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = original_boxes[:num_green_args]
@@ -2349,7 +2358,7 @@
             # warmstate.py.
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            assert not vinfo.gettoken(virtualizable)
+            assert not vinfo.is_token_nonnull_gcref(virtualizable)
             # fill the virtualizable with the local boxes
             self.synchronize_virtualizable()
         #
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -1101,14 +1101,14 @@
         virtualizable = self.decode_ref(numb.nums[index])
         if self.resume_after_guard_not_forced == 1:
             # in the middle of handle_async_forcing()
-            assert vinfo.gettoken(virtualizable)
-            vinfo.settoken(virtualizable, vinfo.TOKEN_NONE)
+            assert vinfo.is_token_nonnull_gcref(virtualizable)
+            vinfo.reset_token_gcref(virtualizable)
         else:
             # just jumped away from assembler (case 4 in the comment in
             # virtualizable.py) into tracing (case 2); check that vable_token
             # is and stays 0.  Note the call to reset_vable_token() in
             # warmstate.py.
-            assert not vinfo.gettoken(virtualizable)
+            assert not vinfo.is_token_nonnull_gcref(virtualizable)
         return vinfo.write_from_resume_data_partial(virtualizable, self, numb)
 
     def load_value_of_type(self, TYPE, tagged):
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -144,7 +144,7 @@
                            'int_mul': 1, 'guard_true': 2, 'int_sub': 2})
 
 
-    def test_loop_invariant_mul_ovf(self):
+    def test_loop_invariant_mul_ovf1(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         def f(x, y):
             res = 0
@@ -235,6 +235,65 @@
                            'guard_true': 4, 'int_sub': 4, 'jump': 3,
                            'int_mul': 3, 'int_add': 4})
 
+    def test_loop_invariant_mul_ovf2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
+        def f(x, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x=x, y=y, res=res)
+                myjitdriver.jit_merge_point(x=x, y=y, res=res)
+                b = y * 2
+                try:
+                    res += ovfcheck(x * x) + b
+                except OverflowError:
+                    res += 1
+                y -= 1
+            return res
+        res = self.meta_interp(f, [sys.maxint, 7])
+        assert res == f(sys.maxint, 7)
+        self.check_trace_count(1)
+        res = self.meta_interp(f, [6, 7])
+        assert res == 308
+
+    def test_loop_invariant_mul_bridge_ovf1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x1', 'x2'])
+        def f(x1, x2, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x1=x1, x2=x2, y=y, res=res)
+                myjitdriver.jit_merge_point(x1=x1, x2=x2, y=y, res=res)
+                try:
+                    res += ovfcheck(x1 * x1)
+                except OverflowError:
+                    res += 1
+                if y<32 and (y>>2)&1==0:
+                    x1, x2 = x2, x1
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, sys.maxint, 48])
+        assert res == f(6, sys.maxint, 48)
+
+    def test_loop_invariant_mul_bridge_ovf2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x1', 'x2', 'n'])
+        def f(x1, x2, n, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x1=x1, x2=x2, y=y, res=res, n=n)
+                myjitdriver.jit_merge_point(x1=x1, x2=x2, y=y, res=res, n=n)
+                try:
+                    res += ovfcheck(x1 * x1)
+                except OverflowError:
+                    res += 1
+                y -= 1
+                if y&4 == 0:
+                    x1, x2 = x2, x1
+            return res
+        res = self.meta_interp(f, [6, sys.maxint, 32, 48])
+        assert res == f(6, sys.maxint, 32, 48)
+        res = self.meta_interp(f, [sys.maxint, 6, 32, 48])
+        assert res == f(sys.maxint, 6, 32, 48)
+        
+
     def test_loop_invariant_intbox(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         class I:
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -14,7 +14,7 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token, name=''):
+    def compile_loop(self, inputargs, operations, token, log=True, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -54,7 +54,7 @@
         class FakeJitDriver(object):
             class warmstate(object):
                 get_location_str = staticmethod(lambda args: "dupa")
-        
+
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
@@ -77,7 +77,7 @@
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
         return logger, loop, oloop
-    
+
     def test_simple(self):
         inp = '''
         [i0, i1, i2, p3, p4, p5]
@@ -116,12 +116,13 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point(0, 0)
+        debug_merge_point(0, 0, 0)
         '''
         _, loop, oloop = self.reparse(inp, check_equal=False)
         assert loop.operations[0].getarg(1).getint() == 0
-        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
-        
+        assert loop.operations[0].getarg(2).getint() == 0
+        assert oloop.operations[0].getarg(2)._get_str() == "dupa"
+
     def test_floats(self):
         inp = '''
         [f0]
@@ -142,7 +143,7 @@
         output = logger.log_loop(loop)
         assert output.splitlines()[-1] == "jump(i0, descr=<Loop3>)"
         pure_parse(output)
-        
+
     def test_guard_descr(self):
         namespace = {'fdescr': BasicFailDescr()}
         inp = '''
@@ -154,7 +155,7 @@
         output = logger.log_loop(loop)
         assert output.splitlines()[-1] == "guard_true(i0, descr=<Guard0>) [i0]"
         pure_parse(output)
-        
+
         logger = Logger(self.make_metainterp_sd(), guard_number=False)
         output = logger.log_loop(loop)
         lastline = output.splitlines()[-1]
diff --git a/pypy/jit/metainterp/test/test_quasiimmut.py b/pypy/jit/metainterp/test/test_quasiimmut.py
--- a/pypy/jit/metainterp/test/test_quasiimmut.py
+++ b/pypy/jit/metainterp/test/test_quasiimmut.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.quasiimmut import get_current_qmut_instance
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.codewriter.policy import StopAtXPolicy
-from pypy.rlib.jit import JitDriver, dont_look_inside
+from pypy.rlib.jit import JitDriver, dont_look_inside, unroll_safe
 
 
 def test_get_current_qmut_instance():
@@ -480,6 +480,32 @@
         assert res == 1
         self.check_jitcell_token_count(2)
 
+    def test_for_loop_array(self):
+        myjitdriver = JitDriver(greens=[], reds=["n", "i"])
+        class Foo(object):
+            _immutable_fields_ = ["x?[*]"]
+            def __init__(self, x):
+                self.x = x
+        f = Foo([1, 3, 5, 6])
+        @unroll_safe
+        def g(v):
+            for x in f.x:
+                if x & 1 == 0:
+                    v += 1
+            return v
+        def main(n):
+            i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i)
+                i = g(i)
+            return i
+        res = self.meta_interp(main, [10])
+        assert res == 10
+        self.check_resops({
+            "int_add": 2, "int_lt": 2, "jump": 1, "guard_true": 2,
+            "guard_not_invalidated": 2
+        })
+
 
 class TestLLtypeGreenFieldsTests(QuasiImmutTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -13,7 +13,7 @@
 
 
 class WarmspotTests(object):
-    
+
     def test_basic(self):
         mydriver = JitDriver(reds=['a'],
                              greens=['i'])
@@ -77,16 +77,16 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == (0, 123)
+            assert loc == (0, 0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
-        
+
         myjitdriver = JitDriver(greens = [], reds = ['n'])
         class A(object):
             def m(self, n):
                 return n-1
-            
+
         def g(n):
             while n > 0:
                 myjitdriver.can_enter_jit(n=n)
@@ -332,7 +332,7 @@
             ts = llhelper
             translate_support_code = False
             stats = "stats"
-            
+
             def get_fail_descr_number(self, d):
                 return -1
 
@@ -352,7 +352,7 @@
                 return "not callable"
 
         driver = JitDriver(reds = ['red'], greens = ['green'])
-        
+
         def f(green):
             red = 0
             while red < 10:
diff --git a/pypy/jit/metainterp/virtualizable.py b/pypy/jit/metainterp/virtualizable.py
--- a/pypy/jit/metainterp/virtualizable.py
+++ b/pypy/jit/metainterp/virtualizable.py
@@ -262,15 +262,15 @@
         force_now._dont_inline_ = True
         self.force_now = force_now
 
-        def gettoken(virtualizable):
+        def is_token_nonnull_gcref(virtualizable):
             virtualizable = cast_gcref_to_vtype(virtualizable)
-            return virtualizable.vable_token
-        self.gettoken = gettoken
+            return bool(virtualizable.vable_token)
+        self.is_token_nonnull_gcref = is_token_nonnull_gcref
 
-        def settoken(virtualizable, token):
+        def reset_token_gcref(virtualizable):
             virtualizable = cast_gcref_to_vtype(virtualizable)
-            virtualizable.vable_token = token
-        self.settoken = settoken
+            virtualizable.vable_token = VirtualizableInfo.TOKEN_NONE
+        self.reset_token_gcref = reset_token_gcref
 
     def _freeze_(self):
         return True
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -100,7 +100,7 @@
     if not kwds.get('translate_support_code', False):
         warmrunnerdesc.metainterp_sd.profiler.finish()
         warmrunnerdesc.metainterp_sd.cpu.finish_once()
-    print '~~~ return value:', res
+    print '~~~ return value:', repr(res)
     while repeat > 1:
         print '~' * 79
         res1 = interp.eval_graph(graph, args)
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -146,16 +146,18 @@
     def test_debug_merge_point(self):
         x = '''
         []
-        debug_merge_point(0, "info")
-        debug_merge_point(0, 'info')
-        debug_merge_point(1, '<some ('other.')> info')
-        debug_merge_point(0, '(stuff) #1')
+        debug_merge_point(0, 0, "info")
+        debug_merge_point(0, 0, 'info')
+        debug_merge_point(1, 1, '<some ('other.')> info')
+        debug_merge_point(0, 0, '(stuff) #1')
         '''
         loop = self.parse(x)
-        assert loop.operations[0].getarg(1)._get_str() == 'info'
-        assert loop.operations[1].getarg(1)._get_str() == 'info'
-        assert loop.operations[2].getarg(1)._get_str() == "<some ('other.')> info"
-        assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
+        assert loop.operations[0].getarg(2)._get_str() == 'info'
+        assert loop.operations[0].getarg(1).value == 0
+        assert loop.operations[1].getarg(2)._get_str() == 'info'
+        assert loop.operations[2].getarg(2)._get_str() == "<some ('other.')> info"
+        assert loop.operations[2].getarg(1).value == 1
+        assert loop.operations[3].getarg(2)._get_str() == "(stuff) #1"
 
 
     def test_descr_with_obj_print(self):
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -5,14 +5,13 @@
 from pypy.rlib import streamio
 from pypy.rlib.rarithmetic import r_longlong
 from pypy.rlib.rstring import StringBuilder
-from pypy.module._file.interp_stream import (W_AbstractStream, StreamErrors,
-    wrap_streamerror, wrap_oserror_as_ioerror)
+from pypy.module._file.interp_stream import W_AbstractStream, StreamErrors
 from pypy.module.posix.interp_posix import dispatch_filename
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
     interp_attrproperty, make_weakref_descr, interp_attrproperty_w)
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-
+from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
 
 class W_File(W_AbstractStream):
     """An interp-level file object.  This implements the same interface than
diff --git a/pypy/module/_file/interp_stream.py b/pypy/module/_file/interp_stream.py
--- a/pypy/module/_file/interp_stream.py
+++ b/pypy/module/_file/interp_stream.py
@@ -2,27 +2,13 @@
 from pypy.rlib import streamio
 from pypy.rlib.streamio import StreamErrors
 
-from pypy.interpreter.error import OperationError, wrap_oserror2
+from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
 
 
-def wrap_streamerror(space, e, w_filename=None):
-    if isinstance(e, streamio.StreamError):
-        return OperationError(space.w_ValueError,
-                              space.wrap(e.message))
-    elif isinstance(e, OSError):
-        return wrap_oserror_as_ioerror(space, e, w_filename)
-    else:
-        # should not happen: wrap_streamerror() is only called when
-        # StreamErrors = (OSError, StreamError) are raised
-        return OperationError(space.w_IOError, space.w_None)
-
-def wrap_oserror_as_ioerror(space, e, w_filename=None):
-    return wrap_oserror2(space, e, w_filename,
-                         w_exception_class=space.w_IOError)
-
 class W_AbstractStream(Wrappable):
     """Base class for interp-level objects that expose streams to app-level"""
     slock = None
diff --git a/pypy/module/_io/interp_iobase.py b/pypy/module/_io/interp_iobase.py
--- a/pypy/module/_io/interp_iobase.py
+++ b/pypy/module/_io/interp_iobase.py
@@ -326,8 +326,11 @@
             try:
                 space.call_method(w_iobase, 'flush')
             except OperationError, e:
-                # if it's an IOError, ignore it
-                if not e.match(space, space.w_IOError):
+                # if it's an IOError or ValueError, ignore it (ValueError is
+                # raised if by chance we are trying to flush a file which has
+                # already been closed)
+                if not (e.match(space, space.w_IOError) or
+                        e.match(space, space.w_ValueError)):
                     raise
         
 
diff --git a/pypy/module/_io/test/test_fileio.py b/pypy/module/_io/test/test_fileio.py
--- a/pypy/module/_io/test/test_fileio.py
+++ b/pypy/module/_io/test/test_fileio.py
@@ -178,7 +178,7 @@
     space.finish()
     assert tmpfile.read() == '42'
 
-def test_flush_at_exit_IOError():
+def test_flush_at_exit_IOError_and_ValueError():
     from pypy import conftest
     from pypy.tool.option import make_config, make_objspace
 
@@ -190,7 +190,12 @@
             def flush(self):
                 raise IOError
 
+        class MyStream2(io.IOBase):
+            def flush(self):
+                raise ValueError
+
         s = MyStream()
+        s2 = MyStream2()
         import sys; sys._keepalivesomewhereobscure = s
     """)
     space.finish() # the IOError has been ignored
diff --git a/pypy/module/_md5/test/test_md5.py b/pypy/module/_md5/test/test_md5.py
--- a/pypy/module/_md5/test/test_md5.py
+++ b/pypy/module/_md5/test/test_md5.py
@@ -28,7 +28,7 @@
         assert self.md5.digest_size == 16
         #assert self.md5.digestsize == 16        -- not on CPython
         assert self.md5.md5().digest_size == 16
-        if sys.version >= (2, 5):
+        if sys.version_info >= (2, 5):
             assert self.md5.blocksize == 1
             assert self.md5.md5().digestsize == 16
 
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -11,6 +11,7 @@
 from pypy.objspace.std.register_all import register_all
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import specialize
 from pypy.rpython.lltypesystem import lltype, rffi
 
 
@@ -159,13 +160,15 @@
 
 
 def make_array(mytype):
+    W_ArrayBase = globals()['W_ArrayBase']
+
     class W_Array(W_ArrayBase):
         itemsize = mytype.bytes
         typecode = mytype.typecode
 
         @staticmethod
         def register(typeorder):
-            typeorder[W_Array] = []
+            typeorder[W_Array] = [(W_ArrayBase, None)]
 
         def __init__(self, space):
             self.space = space
@@ -583,13 +586,29 @@
             raise OperationError(space.w_ValueError, space.wrap(msg))
 
     # Compare methods
-    def cmp__Array_ANY(space, self, other):
-        if isinstance(other, W_ArrayBase):
-            w_lst1 = array_tolist__Array(space, self)
-            w_lst2 = space.call_method(other, 'tolist')
-            return space.cmp(w_lst1, w_lst2)
-        else:
-            return space.w_NotImplemented
+    @specialize.arg(3)
+    def _cmp_impl(space, self, other, space_fn):
+        w_lst1 = array_tolist__Array(space, self)
+        w_lst2 = space.call_method(other, 'tolist')
+        return space_fn(w_lst1, w_lst2)
+
+    def eq__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.eq)
+
+    def ne__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.ne)
+
+    def lt__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.lt)
+
+    def le__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.le)
+
+    def gt__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.gt)
+
+    def ge__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.ge)
 
     # Misc methods
 
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -536,12 +536,6 @@
                 assert (a >= c) is False
                 assert (c >= a) is True
 
-                assert cmp(a, a) == 0
-                assert cmp(a, b) == 0
-                assert cmp(a, c) <  0
-                assert cmp(b, a) == 0
-                assert cmp(c, a) >  0
-
     def test_reduce(self):
         import pickle
         a = self.array('i', [1, 2, 3])
@@ -851,8 +845,11 @@
         cls.maxint = sys.maxint
 
 class AppTestArray(BaseArrayTests):
+    OPTIONS = {}
+
     def setup_class(cls):
-        cls.space = gettestobjspace(usemodules=('array', 'struct', '_rawffi'))
+        cls.space = gettestobjspace(usemodules=('array', 'struct', '_rawffi'),
+                                    **cls.OPTIONS)
         cls.w_array = cls.space.appexec([], """():
             import array
             return array.array
@@ -874,3 +871,7 @@
         a = self.array('b', range(4))
         a[::-1] = a
         assert a == self.array('b', [3, 2, 1, 0])
+
+
+class AppTestArrayBuiltinShortcut(AppTestArray):
+    OPTIONS = {'objspace.std.builtinshortcut': True}
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -56,6 +56,8 @@
 #define Py_TYPE(ob)		(((PyObject*)(ob))->ob_type)
 #define Py_SIZE(ob)		(((PyVarObject*)(ob))->ob_size)
 
+#define _Py_ForgetReference(ob) /* nothing */
+
 #define Py_None (&_Py_NoneStruct)
 
 /*
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -1,10 +1,11 @@
 from pypy.module.imp import importing
 from pypy.module._file.interp_file import W_File
 from pypy.rlib import streamio
+from pypy.rlib.streamio import StreamErrors
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.module import Module
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.module._file.interp_stream import StreamErrors, wrap_streamerror
+from pypy.interpreter.streamutil import wrap_streamerror
 
 
 def get_suffixes(space):
diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -357,7 +357,7 @@
 
     def test_cannot_write_pyc(self):
         import sys, os
-        p = os.path.join(sys.path[-1], 'readonly')
+        p = os.path.join(sys.path[0], 'readonly')
         try:
             os.chmod(p, 0555)
         except:
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
--- a/pypy/module/pypyjit/interp_resop.py
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -72,7 +72,7 @@
     Set a compiling hook that will be called each time a loop is optimized,
     but before assembler compilation. This allows to add additional
     optimizations on Python level.
-    
+
     The hook will be called with the following signature:
     hook(jitdriver_name, loop_type, greenkey or guard_number, operations)
 
@@ -121,13 +121,14 @@
             ofs = ops_offset.get(op, 0)
         if op.opnum == rop.DEBUG_MERGE_POINT:
             jd_sd = jitdrivers_sd[op.getarg(0).getint()]
-            greenkey = op.getarglist()[2:]
+            greenkey = op.getarglist()[3:]
             repr = jd_sd.warmstate.get_location_str(greenkey)
             w_greenkey = wrap_greenkey(space, jd_sd.jitdriver, greenkey, repr)
             l_w.append(DebugMergePoint(space, jit_hooks._cast_to_gcref(op),
                                        logops.repr_of_resop(op),
                                        jd_sd.jitdriver.name,
                                        op.getarg(1).getint(),
+                                       op.getarg(2).getint(),
                                        w_greenkey))
         else:
             l_w.append(WrappedOp(jit_hooks._cast_to_gcref(op), ofs,
@@ -164,14 +165,16 @@
         llres = res.llbox
     return WrappedOp(jit_hooks.resop_new(num, args, llres), offset, repr)
 
- at unwrap_spec(repr=str, jd_name=str, call_depth=int)
-def descr_new_dmp(space, w_tp, w_args, repr, jd_name, call_depth, w_greenkey):
+ at unwrap_spec(repr=str, jd_name=str, call_depth=int, call_id=int)
+def descr_new_dmp(space, w_tp, w_args, repr, jd_name, call_depth, call_id,
+    w_greenkey):
+
     args = [space.interp_w(WrappedBox, w_arg).llbox for w_arg in
             space.listview(w_args)]
     num = rop.DEBUG_MERGE_POINT
     return DebugMergePoint(space,
                            jit_hooks.resop_new(num, args, jit_hooks.emptyval()),
-                           repr, jd_name, call_depth, w_greenkey)
+                           repr, jd_name, call_depth, call_id, w_greenkey)
 
 class WrappedOp(Wrappable):
     """ A class representing a single ResOperation, wrapped nicely
@@ -206,10 +209,13 @@
         jit_hooks.resop_setresult(self.op, box.llbox)
 
 class DebugMergePoint(WrappedOp):
-    def __init__(self, space, op, repr_of_resop, jd_name, call_depth, w_greenkey):
+    def __init__(self, space, op, repr_of_resop, jd_name, call_depth, call_id,
+        w_greenkey):
+
         WrappedOp.__init__(self, op, -1, repr_of_resop)
         self.jd_name = jd_name
         self.call_depth = call_depth
+        self.call_id = call_id
         self.w_greenkey = w_greenkey
 
     def get_pycode(self, space):
@@ -246,6 +252,7 @@
     pycode = GetSetProperty(DebugMergePoint.get_pycode),
     bytecode_no = GetSetProperty(DebugMergePoint.get_bytecode_no),
     call_depth = interp_attrproperty("call_depth", cls=DebugMergePoint),
+    call_id = interp_attrproperty("call_id", cls=DebugMergePoint),
     jitdriver_name = GetSetProperty(DebugMergePoint.get_jitdriver_name),
 )
 DebugMergePoint.acceptable_as_base_class = False
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -54,7 +54,7 @@
         oplist = parse("""
         [i1, i2, p2]
         i3 = int_add(i1, i2)
-        debug_merge_point(0, 0, 0, 0, ConstPtr(ptr0))
+        debug_merge_point(0, 0, 0, 0, 0, ConstPtr(ptr0))
         guard_nonnull(p2) []
         guard_true(i3) []
         """, namespace={'ptr0': code_gcref}).operations
@@ -87,7 +87,7 @@
         def interp_on_abort():
             pypy_hooks.on_abort(ABORT_TOO_LONG, pypyjitdriver, greenkey,
                                 'blah')
-        
+
         cls.w_on_compile = space.wrap(interp2app(interp_on_compile))
         cls.w_on_compile_bridge = space.wrap(interp2app(interp_on_compile_bridge))
         cls.w_on_abort = space.wrap(interp2app(interp_on_abort))
@@ -105,7 +105,7 @@
 
         def hook(name, looptype, tuple_or_guard_no, ops, asmstart, asmlen):
             all.append((name, looptype, tuple_or_guard_no, ops))
-        
+
         self.on_compile()
         pypyjit.set_compile_hook(hook)
         assert not all
@@ -123,6 +123,7 @@
         assert dmp.pycode is self.f.func_code
         assert dmp.greenkey == (self.f.func_code, 0, False)
         assert dmp.call_depth == 0
+        assert dmp.call_id == 0
         assert int_add.name == 'int_add'
         assert int_add.num == self.int_add_num
         self.on_compile_bridge()
@@ -151,18 +152,18 @@
     def test_non_reentrant(self):
         import pypyjit
         l = []
-        
+
         def hook(*args):
             l.append(None)
             self.on_compile()
             self.on_compile_bridge()
-        
+
         pypyjit.set_compile_hook(hook)
         self.on_compile()
         assert len(l) == 1 # and did not crash
         self.on_compile_bridge()
         assert len(l) == 2 # and did not crash
-        
+
     def test_on_compile_types(self):
         import pypyjit
         l = []
@@ -182,7 +183,7 @@
 
         def hook(jitdriver_name, greenkey, reason):
             l.append((jitdriver_name, reason))
-        
+
         pypyjit.set_abort_hook(hook)
         self.on_abort()
         assert l == [('pypyjit', 'ABORT_TOO_LONG')]
@@ -224,13 +225,14 @@
         def f():
             pass
 
-        op = DebugMergePoint([Box(0)], 'repr', 'pypyjit', 2, (f.func_code, 0, 0))
+        op = DebugMergePoint([Box(0)], 'repr', 'pypyjit', 2, 3, (f.func_code, 0, 0))
         assert op.bytecode_no == 0
         assert op.pycode is f.func_code
         assert repr(op) == 'repr'
         assert op.jitdriver_name == 'pypyjit'
         assert op.num == self.dmp_num
         assert op.call_depth == 2
-        op = DebugMergePoint([Box(0)], 'repr', 'notmain', 5, ('str',))
+        assert op.call_id == 3
+        op = DebugMergePoint([Box(0)], 'repr', 'notmain', 5, 4, ('str',))
         raises(AttributeError, 'op.pycode')
         assert op.call_depth == 5
diff --git a/pypy/module/pypyjit/test_pypy_c/test_00_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_00_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -60,6 +60,9 @@
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
         stdout, stderr = pipe.communicate()
+        if getattr(pipe, 'returncode', 0) < 0:
+            raise IOError("subprocess was killed by signal %d" % (
+                pipe.returncode,))
         if stderr.startswith('SKIP:'):
             py.test.skip(stderr)
         if stderr.startswith('debug_alloc.h:'):   # lldebug builds
diff --git a/pypy/module/pypyjit/test_pypy_c/test_alloc.py b/pypy/module/pypyjit/test_pypy_c/test_alloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_alloc.py
@@ -0,0 +1,26 @@
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestAlloc(BaseTestPyPyC):
+
+    SIZES = dict.fromkeys([2 ** n for n in range(26)] +     # up to 32MB
+                          [2 ** n - 1 for n in range(26)])
+
+    def test_newstr_constant_size(self):
+        for size in TestAlloc.SIZES:
+            yield self.newstr_constant_size, size
+
+    def newstr_constant_size(self, size):
+        src = """if 1:
+                    N = %(size)d
+                    part_a = 'a' * N
+                    part_b = 'b' * N
+                    for i in xrange(20):
+                        ao = '%%s%%s' %% (part_a, part_b)
+                    def main():
+                        return 42
+""" % {'size': size}
+        log = self.run(src, [], threshold=10)
+        assert log.result == 42
+        loop, = log.loops_by_filename(self.filepath)
+        # assert did not crash
diff --git a/pypy/module/select/test/test_ztranslation.py b/pypy/module/select/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/test/test_ztranslation.py
@@ -0,0 +1,5 @@
+
+from pypy.objspace.fake.checkmodule import checkmodule
+
+def test_select_translates():
+    checkmodule('select')
diff --git a/pypy/module/test_lib_pypy/test_collections.py b/pypy/module/test_lib_pypy/test_collections.py
--- a/pypy/module/test_lib_pypy/test_collections.py
+++ b/pypy/module/test_lib_pypy/test_collections.py
@@ -6,7 +6,7 @@
 
 from pypy.conftest import gettestobjspace
 
-class AppTestcStringIO:
+class AppTestCollections:
     def test_copy(self):
         import _collections
         def f():
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -103,6 +103,7 @@
                           'terminator',
                           '_version_tag?',
                           'name?',
+                          'mro_w?[*]',
                           ]
 
     # for config.objspace.std.getattributeshortcut
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -392,6 +392,9 @@
 class JitHintError(Exception):
     """Inconsistency in the JIT hints."""
 
+ENABLE_ALL_OPTS = (
+    'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:ffi:unroll')
+
 PARAMETER_DOCS = {
     'threshold': 'number of times a loop has to run for it to become hot',
     'function_threshold': 'number of times a function must run for it to become traced from start',
@@ -402,7 +405,8 @@
     'retrace_limit': 'how many times we can try retracing before giving up',
     'max_retrace_guards': 'number of extra guards a retrace can cause',
     'max_unroll_loops': 'number of extra unrollings a loop can cause',
-    'enable_opts': 'optimizations to enable or all, INTERNAL USE ONLY'
+    'enable_opts': 'INTERNAL USE ONLY: optimizations to enable, or all = %s' %
+                       ENABLE_ALL_OPTS,
     }
 
 PARAMETERS = {'threshold': 1039, # just above 1024, prime
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -392,7 +392,11 @@
                                          ('list', r_list.lowleveltype),
                                          ('index', Signed)))
         self.ll_listiter = ll_listiter
-        self.ll_listnext = ll_listnext
+        if (isinstance(r_list, FixedSizeListRepr)
+                and not r_list.listitem.mutated):
+            self.ll_listnext = ll_listnext_foldable
+        else:
+            self.ll_listnext = ll_listnext
         self.ll_getnextindex = ll_getnextindex
 
 def ll_listiter(ITERPTR, lst):
@@ -409,5 +413,14 @@
     iter.index = index + 1      # cannot overflow because index < l.length
     return l.ll_getitem_fast(index)
 
+def ll_listnext_foldable(iter):
+    from pypy.rpython.rlist import ll_getitem_foldable_nonneg
+    l = iter.list
+    index = iter.index
+    if index >= l.ll_length():
+        raise StopIteration
+    iter.index = index + 1      # cannot overflow because index < l.length
+    return ll_getitem_foldable_nonneg(l, index)
+
 def ll_getnextindex(iter):
     return iter.index
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -62,6 +62,14 @@
     @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
     @enforceargs(None, None, int, int, int)
     def copy_string_contents(src, dst, srcstart, dststart, length):
+        """Copies 'length' characters from the 'src' string to the 'dst'
+        string, starting at position 'srcstart' and 'dststart'."""
+        # xxx Warning: don't try to do this at home.  It relies on a lot
+        # of details to be sure that it works correctly in all cases.
+        # Notably: no GC operation at all from the first cast_ptr_to_adr()
+        # because it might move the strings.  The keepalive_until_here()
+        # are obscurely essential to make sure that the strings stay alive
+        # longer than the raw_memcopy().
         assert srcstart >= 0
         assert dststart >= 0
         assert length >= 0
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -608,6 +608,11 @@
         specified as 0 if the object is not varsized.  The returned
         object is fully initialized and zero-filled."""
         #
+        # Here we really need a valid 'typeid', not 0 (as the JIT might
+        # try to send us if there is still a bug).
+        ll_assert(bool(self.combine(typeid, 0)),
+                  "external_malloc: typeid == 0")
+        #
         # Compute the total size, carefully checking for overflows.
         size_gc_header = self.gcheaderbuilder.size_gc_header
         nonvarsize = size_gc_header + self.fixed_size(typeid)
diff --git a/pypy/rpython/memory/gctransform/asmgcroot.py b/pypy/rpython/memory/gctransform/asmgcroot.py
--- a/pypy/rpython/memory/gctransform/asmgcroot.py
+++ b/pypy/rpython/memory/gctransform/asmgcroot.py
@@ -442,6 +442,8 @@
         ll_assert(location >= 0, "negative location")
         kind = location & LOC_MASK
         offset = location & ~ LOC_MASK
+        if IS_64_BITS:
+            offset <<= 1
         if kind == LOC_REG:   # register
             if location == LOC_NOWHERE:
                 return llmemory.NULL
diff --git a/pypy/rpython/rclass.py b/pypy/rpython/rclass.py
--- a/pypy/rpython/rclass.py
+++ b/pypy/rpython/rclass.py
@@ -364,6 +364,8 @@
     def get_ll_hash_function(self):
         return ll_inst_hash
 
+    get_ll_fasthash_function = get_ll_hash_function
+
     def rtype_type(self, hop):
         raise NotImplementedError
 
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -449,6 +449,21 @@
 
         assert r_AB_dic.lowleveltype == r_BA_dic.lowleveltype
 
+    def test_identity_hash_is_fast(self):
+        class A(object):
+            pass
+
+        def f():
+            return {A(): 1}
+
+        t = TranslationContext()
+        s = t.buildannotator().build_types(f, [])
+        rtyper = t.buildrtyper()
+        rtyper.specialize()
+
+        r_dict = rtyper.getrepr(s)
+        assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+
     def test_tuple_dict(self):
         def f(i):
             d = {}
diff --git a/pypy/rpython/test/test_rlist.py b/pypy/rpython/test/test_rlist.py
--- a/pypy/rpython/test/test_rlist.py
+++ b/pypy/rpython/test/test_rlist.py
@@ -8,6 +8,7 @@
 from pypy.rpython.rlist import *
 from pypy.rpython.lltypesystem.rlist import ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist
 from pypy.rpython.lltypesystem import rlist as ll_rlist
+from pypy.rpython.llinterp import LLException
 from pypy.rpython.ootypesystem import rlist as oo_rlist
 from pypy.rpython.rint import signed_repr
 from pypy.objspace.flow.model import Constant, Variable
@@ -1477,6 +1478,80 @@
         assert func1.oopspec == 'list.getitem_foldable(l, index)'
         assert not hasattr(func2, 'oopspec')
 
+    def test_iterate_over_immutable_list(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        lst = list('abcdef')
+        def dummyfn():
+            total = 0
+            for c in lst:
+                total += ord(c)
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            e = raises(LLException, self.interpret, dummyfn, [])
+            assert 'KeyError' in str(e.value)
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+    def test_iterate_over_immutable_list_quasiimmut_attr(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        class Foo:
+            _immutable_fields_ = ['lst?[*]']
+            lst = list('abcdef')
+        foo = Foo()
+        def dummyfn():
+            total = 0
+            for c in foo.lst:
+                total += ord(c)
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            e = raises(LLException, self.interpret, dummyfn, [])
+            assert 'KeyError' in str(e.value)
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+    def test_iterate_over_mutable_list(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        lst = list('abcdef')
+        def dummyfn():
+            total = 0
+            for c in lst:
+                total += ord(c)
+            lst[0] = 'x'
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            res = self.interpret(dummyfn, [])
+            assert res == sum(map(ord, 'abcdef'))
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+
 class TestOOtype(BaseTestRlist, OORtypeMixin):
     rlist = oo_rlist
     type_system = 'ootype'
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -93,7 +93,7 @@
                         end_index += 1
                     op.asm = '\n'.join([asm[i][1] for i in range(asm_index, end_index)])
         return loop
-                    
+
     def _asm_disassemble(self, d, origin_addr, tp):
         from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
         return list(machine_code_dump(d, tp, origin_addr))
@@ -109,7 +109,7 @@
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.split(", ", 1), None
+            return argspec.split(", ", 2), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -159,7 +159,7 @@
         for op in operations:
             if op.name == 'debug_merge_point':
                 self.inline_level = int(op.args[0])
-                self.parse_code_data(op.args[1][1:-1])
+                self.parse_code_data(op.args[2][1:-1])
                 break
         else:
             self.inline_level = 0
@@ -417,7 +417,7 @@
         part.descr = descrs[i]
         part.comment = trace.comment
         parts.append(part)
-    
+
     return parts
 
 def parse_log_counts(input, loops):
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -29,7 +29,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point(0, "SomeRandomStuff")
+    debug_merge_point(0, 0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -39,10 +39,10 @@
     ops = parse('''
     [i0]
     label()
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage(), loopname='<loopname>')
@@ -57,12 +57,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
+    debug_merge_point(0, 0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
-    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, 0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -75,10 +75,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -92,10 +92,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -105,10 +105,10 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
@@ -119,11 +119,11 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
@@ -135,7 +135,7 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point(0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
+    debug_merge_point(0, 0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -181,7 +181,7 @@
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
+    debug_merge_point(0, 0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
@@ -193,12 +193,12 @@
     loop = parse("""
     # Loop 0 : loop with 19 ops
     [p0, p1, p2, p3, i4]
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
     +166: i6 = int_lt(i4, 10000)
     guard_true(i6, descr=<Guard3>) [p1, p0, p2, p3, i4]
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
     +179: i8 = int_add(i4, 1)
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
     +183: i10 = getfield_raw(40564608, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
     +191: i12 = int_sub(i10, 1)
     +195: setfield_raw(40564608, i12, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
@@ -287,8 +287,8 @@
 def test_parse_nonpython():
     loop = parse("""
     []
-    debug_merge_point(0, 'random')
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    debug_merge_point(0, 0, 'random')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
     """)
     f = Function.from_operations(loop.operations, LoopStorage())
     assert f.chunks[-1].filename == 'x.py'
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -13,13 +13,17 @@
 ARGUMENT_REGISTERS_64 = ('%rdi', '%rsi', '%rdx', '%rcx', '%r8', '%r9')
 
 
-def frameloc_esp(offset):
+def frameloc_esp(offset, wordsize):
     assert offset >= 0
-    assert offset % 4 == 0
+    assert offset % wordsize == 0
+    if wordsize == 8:    # in this case, there are 3 null bits, but we
+        offset >>= 1     # only need 2 of them
     return LOC_ESP_PLUS | offset
 
-def frameloc_ebp(offset):
-    assert offset % 4 == 0
+def frameloc_ebp(offset, wordsize):
+    assert offset % wordsize == 0
+    if wordsize == 8:    # in this case, there are 3 null bits, but we
+        offset >>= 1     # only need 2 of them
     if offset >= 0:
         return LOC_EBP_PLUS | offset
     else:
@@ -57,12 +61,12 @@
             # try to use esp-relative addressing
             ofs_from_esp = framesize + self.ofs_from_frame_end
             if ofs_from_esp % 2 == 0:
-                return frameloc_esp(ofs_from_esp)
+                return frameloc_esp(ofs_from_esp, wordsize)
             # we can get an odd value if the framesize is marked as bogus
             # by visit_andl()
         assert uses_frame_pointer
         ofs_from_ebp = self.ofs_from_frame_end + wordsize
-        return frameloc_ebp(ofs_from_ebp)
+        return frameloc_ebp(ofs_from_ebp, wordsize)
 
 
 class Insn(object):
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -78,9 +78,9 @@
             if self.is_stack_bottom:
                 retaddr = LOC_NOWHERE     # end marker for asmgcroot.py
             elif self.uses_frame_pointer:
-                retaddr = frameloc_ebp(self.WORD)
+                retaddr = frameloc_ebp(self.WORD, self.WORD)
             else:
-                retaddr = frameloc_esp(insn.framesize)
+                retaddr = frameloc_esp(insn.framesize, self.WORD)
             shape = [retaddr]
             # the first gcroots are always the ones corresponding to
             # the callee-saved registers
@@ -894,6 +894,8 @@
             return '%' + cls.CALLEE_SAVE_REGISTERS[reg].replace("%", "")
         else:
             offset = loc & ~ LOC_MASK
+            if cls.WORD == 8:
+                offset <<= 1
             if kind == LOC_EBP_PLUS:
                 result = '(%' + cls.EBP.replace("%", "") + ')'
             elif kind == LOC_EBP_MINUS:
diff --git a/pypy/translator/c/src/libffi_msvc/ffi.c b/pypy/translator/c/src/libffi_msvc/ffi.c
--- a/pypy/translator/c/src/libffi_msvc/ffi.c
+++ b/pypy/translator/c/src/libffi_msvc/ffi.c
@@ -71,31 +71,31 @@
 	  switch ((*p_arg)->type)
 	    {
 	    case FFI_TYPE_SINT8:
-	      *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+	      *(signed int *) argp = (signed int)*(ffi_SINT8 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_UINT8:
-	      *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+	      *(unsigned int *) argp = (unsigned int)*(ffi_UINT8 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_SINT16:
-	      *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+	      *(signed int *) argp = (signed int)*(ffi_SINT16 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_UINT16:
-	      *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+	      *(unsigned int *) argp = (unsigned int)*(ffi_UINT16 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_SINT32:
-	      *(signed int *) argp = (signed int)*(SINT32 *)(* p_argv);
+	      *(signed int *) argp = (signed int)*(ffi_SINT32 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_UINT32:
-	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      *(unsigned int *) argp = (unsigned int)*(ffi_UINT32 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_STRUCT:
-	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      *(unsigned int *) argp = (unsigned int)*(ffi_UINT32 *)(* p_argv);
 	      break;
 
 	    default:
diff --git a/pypy/translator/c/src/libffi_msvc/ffi_common.h b/pypy/translator/c/src/libffi_msvc/ffi_common.h
--- a/pypy/translator/c/src/libffi_msvc/ffi_common.h
+++ b/pypy/translator/c/src/libffi_msvc/ffi_common.h
@@ -56,16 +56,18 @@
 } extended_cif;
 
 /* Terse sized type definitions.  */
-typedef unsigned int UINT8  __attribute__((__mode__(__QI__)));
-typedef signed int   SINT8  __attribute__((__mode__(__QI__)));
-typedef unsigned int UINT16 __attribute__((__mode__(__HI__)));
-typedef signed int   SINT16 __attribute__((__mode__(__HI__)));
-typedef unsigned int UINT32 __attribute__((__mode__(__SI__)));
-typedef signed int   SINT32 __attribute__((__mode__(__SI__)));
-typedef unsigned int UINT64 __attribute__((__mode__(__DI__)));
-typedef signed int   SINT64 __attribute__((__mode__(__DI__)));
+/* Fix for PyPy: these names are fine, but are bound to conflict with
+ * some other name from somewhere else :-(  Added a 'ffi_' prefix. */
+typedef unsigned int ffi_UINT8  __attribute__((__mode__(__QI__)));
+typedef signed int   ffi_SINT8  __attribute__((__mode__(__QI__)));
+typedef unsigned int ffi_UINT16 __attribute__((__mode__(__HI__)));
+typedef signed int   ffi_SINT16 __attribute__((__mode__(__HI__)));
+typedef unsigned int ffi_UINT32 __attribute__((__mode__(__SI__)));
+typedef signed int   ffi_SINT32 __attribute__((__mode__(__SI__)));
+typedef unsigned int ffi_UINT64 __attribute__((__mode__(__DI__)));
+typedef signed int   ffi_SINT64 __attribute__((__mode__(__DI__)));
 
-typedef float FLOAT32;
+typedef float ffi_FLOAT32;
 
 
 #ifdef __cplusplus
diff --git a/pypy/translator/goal/app_main.py b/pypy/translator/goal/app_main.py
--- a/pypy/translator/goal/app_main.py
+++ b/pypy/translator/goal/app_main.py
@@ -130,30 +130,46 @@
         sys.executable,)
     print __doc__.rstrip()
     if 'pypyjit' in sys.builtin_module_names:
-        _print_jit_help()
+        print "  --jit OPTIONS  advanced JIT options: try 'off' or 'help'"
     print
     raise SystemExit
 
 def _print_jit_help():
-    import pypyjit
+    try:
+        import pypyjit
+    except ImportError:
+        print >> sys.stderr, "No jit support in %s" % (sys.executable,)
+        return
     items = pypyjit.defaults.items()
     items.sort()
+    print 'Advanced JIT options: a comma-separated list of OPTION=VALUE:'
     for key, value in items:
-        prefix = '  --jit %s=N %s' % (key, ' '*(18-len(key)))
+        print
+        print ' %s=N' % (key,)
         doc = '%s (default %s)' % (pypyjit.PARAMETER_DOCS[key], value)
-        while len(doc) > 51:
-            i = doc[:51].rfind(' ')
-            print prefix + doc[:i]
+        while len(doc) > 72:
+            i = doc[:74].rfind(' ')
+            if i < 0:
+                i = doc.find(' ')
+                if i < 0:
+                    i = len(doc)
+            print '    ' + doc[:i]
             doc = doc[i+1:]
-            prefix = ' '*len(prefix)
-        print prefix + doc
-    print '  --jit off                  turn off the JIT'
+        print '    ' + doc
+    print
+    print ' off'
+    print '    turn off the JIT'
+    print ' help'
+    print '    print this page'
 
 def print_version(*args):
     print >> sys.stderr, "Python", sys.version
     raise SystemExit
 
 def set_jit_option(options, jitparam, *args):
+    if jitparam == 'help':
+        _print_jit_help()
+        raise SystemExit
     if 'pypyjit' not in sys.builtin_module_names:
         print >> sys.stderr, ("Warning: No jit support in %s" %
                               (sys.executable,))