[pypy-commit] pypy dynamic-specialized-tuple: merged erase-raw-mem into this

Tue Mar 13 09:07:23 CET 2012

Author: Alex Gaynor <alex.gaynor at gmail.com>
Branch: dynamic-specialized-tuple
Changeset: r53426:eed761704735
Date: 2012-03-12 23:42 -0700
http://bitbucket.org/pypy/pypy/changeset/eed761704735/

Log:	merged erase-raw-mem into this

diff too long, truncating to 10000 out of 10454 lines

diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -311,7 +311,7 @@
     RegrTest('test_mimetypes.py'),
     RegrTest('test_MimeWriter.py', core=False),
     RegrTest('test_minidom.py'),
-    RegrTest('test_mmap.py'),
+    RegrTest('test_mmap.py', usemodules="mmap"),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
     RegrTest('test_msilib.py', skip=only_win32),
diff --git a/lib-python/modified-2.7/ctypes/test/test_arrays.py b/lib-python/modified-2.7/ctypes/test/test_arrays.py
--- a/lib-python/modified-2.7/ctypes/test/test_arrays.py
+++ b/lib-python/modified-2.7/ctypes/test/test_arrays.py
@@ -1,12 +1,23 @@
 import unittest
 from ctypes import *
+from test.test_support import impl_detail
 
 formats = "bBhHiIlLqQfd"
 
+# c_longdouble commented out for PyPy, look at the commend in test_longdouble
 formats = c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, \
-          c_long, c_ulonglong, c_float, c_double, c_longdouble
+          c_long, c_ulonglong, c_float, c_double #, c_longdouble
 
 class ArrayTestCase(unittest.TestCase):
+
+    @impl_detail('long double not supported by PyPy', pypy=False)
+    def test_longdouble(self):
+        """
+        This test is empty. It's just here to remind that we commented out
+        c_longdouble in "formats". If pypy will ever supports c_longdouble, we
+        should kill this test and uncomment c_longdouble inside formats.
+        """
+
     def test_simple(self):
         # create classes holding simple numeric types, and check
         # various properties.
diff --git a/lib-python/modified-2.7/distutils/command/bdist_wininst.py b/lib-python/modified-2.7/distutils/command/bdist_wininst.py
--- a/lib-python/modified-2.7/distutils/command/bdist_wininst.py
+++ b/lib-python/modified-2.7/distutils/command/bdist_wininst.py
@@ -298,7 +298,8 @@
                              bitmaplen,        # number of bytes in bitmap
                              )
         file.write(header)
-        file.write(open(arcname, "rb").read())
+        with open(arcname, "rb") as arcfile:
+            file.write(arcfile.read())
 
     # create_exe()
 
diff --git a/lib-python/modified-2.7/opcode.py b/lib-python/modified-2.7/opcode.py
--- a/lib-python/modified-2.7/opcode.py
+++ b/lib-python/modified-2.7/opcode.py
@@ -192,5 +192,6 @@
 def_op('LOOKUP_METHOD', 201)          # Index in name list
 hasname.append(201)
 def_op('CALL_METHOD', 202)            # #args not including 'self'
+def_op('BUILD_LIST_FROM_ARG', 203)
 
 del def_op, name_op, jrel_op, jabs_op
diff --git a/lib-python/modified-2.7/test/test_dis.py b/lib-python/modified-2.7/test/test_dis.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/test/test_dis.py
@@ -0,0 +1,150 @@
+# Minimal tests for dis module
+
+from test.test_support import run_unittest
+import unittest
+import sys
+import dis
+import StringIO
+
+
+def _f(a):
+    print a
+    return 1
+
+dis_f = """\
+ %-4d         0 LOAD_FAST                0 (a)
+              3 PRINT_ITEM
+              4 PRINT_NEWLINE
+
+ %-4d         5 LOAD_CONST               1 (1)
+              8 RETURN_VALUE
+"""%(_f.func_code.co_firstlineno + 1,
+     _f.func_code.co_firstlineno + 2)
+
+
+def bug708901():
+    for res in range(1,
+                     10):
+        pass
+
+dis_bug708901 = """\
+ %-4d         0 SETUP_LOOP              23 (to 26)
+              3 LOAD_GLOBAL              0 (range)
+              6 LOAD_CONST               1 (1)
+
+ %-4d         9 LOAD_CONST               2 (10)
+             12 CALL_FUNCTION            2
+             15 GET_ITER
+        >>   16 FOR_ITER                 6 (to 25)
+             19 STORE_FAST               0 (res)
+
+ %-4d        22 JUMP_ABSOLUTE           16
+        >>   25 POP_BLOCK
+        >>   26 LOAD_CONST               0 (None)
+             29 RETURN_VALUE
+"""%(bug708901.func_code.co_firstlineno + 1,
+     bug708901.func_code.co_firstlineno + 2,
+     bug708901.func_code.co_firstlineno + 3)
+
+
+def bug1333982(x=[]):
+    assert 0, ([s for s in x] +
+              1)
+    pass
+
+dis_bug1333982 = """\
+ %-4d         0 LOAD_CONST               1 (0)
+              3 POP_JUMP_IF_TRUE        38
+              6 LOAD_GLOBAL              0 (AssertionError)
+              9 LOAD_FAST                0 (x)
+             12 BUILD_LIST_FROM_ARG      0
+             15 GET_ITER
+        >>   16 FOR_ITER                12 (to 31)
+             19 STORE_FAST               1 (s)
+             22 LOAD_FAST                1 (s)
+             25 LIST_APPEND              2
+             28 JUMP_ABSOLUTE           16
+
+ %-4d   >>   31 LOAD_CONST               2 (1)
+             34 BINARY_ADD
+             35 RAISE_VARARGS            2
+
+ %-4d   >>   38 LOAD_CONST               0 (None)
+             41 RETURN_VALUE
+"""%(bug1333982.func_code.co_firstlineno + 1,
+     bug1333982.func_code.co_firstlineno + 2,
+     bug1333982.func_code.co_firstlineno + 3)
+
+_BIG_LINENO_FORMAT = """\
+%3d           0 LOAD_GLOBAL              0 (spam)
+              3 POP_TOP
+              4 LOAD_CONST               0 (None)
+              7 RETURN_VALUE
+"""
+
+class DisTests(unittest.TestCase):
+    def do_disassembly_test(self, func, expected):
+        s = StringIO.StringIO()
+        save_stdout = sys.stdout
+        sys.stdout = s
+        dis.dis(func)
+        sys.stdout = save_stdout
+        got = s.getvalue()
+        # Trim trailing blanks (if any).
+        lines = got.split('\n')
+        lines = [line.rstrip() for line in lines]
+        expected = expected.split("\n")
+        import difflib
+        if expected != lines:
+            self.fail(
+                "events did not match expectation:\n" +
+                "\n".join(difflib.ndiff(expected,
+                                        lines)))
+
+    def test_opmap(self):
+        self.assertEqual(dis.opmap["STOP_CODE"], 0)
+        self.assertIn(dis.opmap["LOAD_CONST"], dis.hasconst)
+        self.assertIn(dis.opmap["STORE_NAME"], dis.hasname)
+
+    def test_opname(self):
+        self.assertEqual(dis.opname[dis.opmap["LOAD_FAST"]], "LOAD_FAST")
+
+    def test_boundaries(self):
+        self.assertEqual(dis.opmap["EXTENDED_ARG"], dis.EXTENDED_ARG)
+        self.assertEqual(dis.opmap["STORE_NAME"], dis.HAVE_ARGUMENT)
+
+    def test_dis(self):
+        self.do_disassembly_test(_f, dis_f)
+
+    def test_bug_708901(self):
+        self.do_disassembly_test(bug708901, dis_bug708901)
+
+    def test_bug_1333982(self):
+        # This one is checking bytecodes generated for an `assert` statement,
+        # so fails if the tests are run with -O.  Skip this test then.
+        if __debug__:
+            self.do_disassembly_test(bug1333982, dis_bug1333982)
+
+    def test_big_linenos(self):
+        def func(count):
+            namespace = {}
+            func = "def foo():\n " + "".join(["\n "] * count + ["spam\n"])
+            exec func in namespace
+            return namespace['foo']
+
+        # Test all small ranges
+        for i in xrange(1, 300):
+            expected = _BIG_LINENO_FORMAT % (i + 2)
+            self.do_disassembly_test(func(i), expected)
+
+        # Test some larger ranges too
+        for i in xrange(300, 5000, 10):
+            expected = _BIG_LINENO_FORMAT % (i + 2)
+            self.do_disassembly_test(func(i), expected)
+
+def test_main():
+    run_unittest(DisTests)
+
+
+if __name__ == "__main__":
+    test_main()
diff --git a/lib_pypy/_csv.py b/lib_pypy/_csv.py
--- a/lib_pypy/_csv.py
+++ b/lib_pypy/_csv.py
@@ -414,7 +414,7 @@
 
     def _parse_add_char(self, c):
         if len(self.field) + len(c) > _field_limit:
-            raise Error("field larget than field limit (%d)" % (_field_limit))
+            raise Error("field larger than field limit (%d)" % (_field_limit))
         self.field += c
         
 
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -1,9 +1,9 @@
-
+import _ffi
 import _rawffi
 
 from _ctypes.basics import _CData, cdata_from_address, _CDataMeta, sizeof
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
-from _ctypes.basics import CArgObject
+from _ctypes.basics import CArgObject, as_ffi_pointer
 
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
@@ -211,6 +211,9 @@
     def _to_ffi_param(self):
         return self._get_buffer_value()
 
+    def _as_ffi_pointer_(self, ffitype):
+        return as_ffi_pointer(self, ffitype)
+
 ARRAY_CACHE = {}
 
 def create_array_type(base, length):
@@ -228,5 +231,6 @@
             _type_ = base
         )
         cls = ArrayMeta(name, (Array,), tpdict)
+        cls._ffiargtype = _ffi.types.Pointer(base.get_ffi_argtype())
         ARRAY_CACHE[key] = cls
         return cls
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -230,5 +230,16 @@
     }
 
 
+# called from primitive.py, pointer.py, array.py
+def as_ffi_pointer(value, ffitype):
+    my_ffitype = type(value).get_ffi_argtype()
+    # for now, we always allow types.pointer, else a lot of tests
+    # break. We need to rethink how pointers are represented, though
+    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
+        raise ArgumentError("expected %s instance, got %s" % (type(value),
+                                                              ffitype))
+    return value._get_buffer_value()
+
+
 # used by "byref"
 from _ctypes.pointer import pointer
diff --git a/lib_pypy/_ctypes/builtin.py b/lib_pypy/_ctypes/builtin.py
--- a/lib_pypy/_ctypes/builtin.py
+++ b/lib_pypy/_ctypes/builtin.py
@@ -31,24 +31,20 @@
     arg = cobj._get_buffer_value()
     return _rawffi.wcharp2rawunicode(arg, lgt)
 
-class ErrorObject(local):
-    def __init__(self):
-        self.errno = 0
-        self.winerror = 0
-_error_object = ErrorObject()
+_err = local()
 
 def get_errno():
-    return _error_object.errno
+    return getattr(_err, "errno", 0)
 
 def set_errno(errno):
-    old_errno = _error_object.errno
-    _error_object.errno = errno
+    old_errno = get_errno()
+    _err.errno = errno
     return old_errno
 
 def get_last_error():
-    return _error_object.winerror
+    return getattr(_err, "winerror", 0)
 
 def set_last_error(winerror):
-    old_winerror = _error_object.winerror
-    _error_object.winerror = winerror
+    old_winerror = get_last_error()
+    _err.winerror = winerror
     return old_winerror
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -3,7 +3,7 @@
 from _ctypes.primitive import SimpleType, _SimpleCData
 from _ctypes.basics import ArgumentError, keepalive_key
 from _ctypes.basics import is_struct_shape
-from _ctypes.builtin import set_errno, set_last_error
+from _ctypes.builtin import get_errno, set_errno, get_last_error, set_last_error
 import _rawffi
 import _ffi
 import sys
@@ -350,16 +350,24 @@
     def _call_funcptr(self, funcptr, *newargs):
 
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
-            set_errno(_rawffi.get_errno())
+            tmp = _rawffi.get_errno()
+            _rawffi.set_errno(get_errno())
+            set_errno(tmp)
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
-            set_last_error(_rawffi.get_last_error())
+            tmp = _rawffi.get_last_error()
+            _rawffi.set_last_error(get_last_error())
+            set_last_error(tmp)
         try:
             result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
-                set_errno(_rawffi.get_errno())
+                tmp = _rawffi.get_errno()
+                _rawffi.set_errno(get_errno())
+                set_errno(tmp)
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
-                set_last_error(_rawffi.get_last_error())
+                tmp = _rawffi.get_last_error()
+                _rawffi.set_last_error(get_last_error())
+                set_last_error(tmp)
         #
         try:
             return self._build_result(self._restype_, result, newargs)
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -3,7 +3,7 @@
 import _ffi
 from _ctypes.basics import _CData, _CDataMeta, cdata_from_address, ArgumentError
 from _ctypes.basics import keepalive_key, store_reference, ensure_objects
-from _ctypes.basics import sizeof, byref
+from _ctypes.basics import sizeof, byref, as_ffi_pointer
 from _ctypes.array import Array, array_get_slice_params, array_slice_getitem,\
      array_slice_setitem
 
@@ -119,14 +119,6 @@
     def _as_ffi_pointer_(self, ffitype):
         return as_ffi_pointer(self, ffitype)
 
-def as_ffi_pointer(value, ffitype):
-    my_ffitype = type(value).get_ffi_argtype()
-    # for now, we always allow types.pointer, else a lot of tests
-    # break. We need to rethink how pointers are represented, though
-    if my_ffitype is not ffitype and ffitype is not _ffi.types.void_p:
-        raise ArgumentError("expected %s instance, got %s" % (type(value),
-                                                              ffitype))
-    return value._get_buffer_value()
 
 def _cast_addr(obj, _, tp):
     if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()):
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -2,16 +2,95 @@
 # One-liner implementation of cPickle
 #
 
-from pickle import *
+from pickle import Pickler, dump, dumps, PickleError, PicklingError, UnpicklingError, _EmptyClass
 from pickle import __doc__, __version__, format_version, compatible_formats
+from types import *
+from copy_reg import dispatch_table
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import marshal, struct, sys
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
 
+# These are purely informational; no code uses these.
+format_version = "2.0"                  # File format version we write
+compatible_formats = ["1.0",            # Original protocol 0
+                      "1.1",            # Protocol 0 with INST added
+                      "1.2",            # Original protocol 1
+                      "1.3",            # Protocol 1 with BINFLOAT added
+                      "2.0",            # Protocol 2
+                      ]                 # Old format versions we can read
+
+# Keep in synch with cPickle.  This is the highest protocol number we
+# know how to read.
+HIGHEST_PROTOCOL = 2
 
 BadPickleGet = KeyError
 UnpickleableError = PicklingError
 
+MARK            = ord('(')   # push special markobject on stack
+STOP            = ord('.')   # every pickle ends with STOP
+POP             = ord('0')   # discard topmost stack item
+POP_MARK        = ord('1')   # discard stack top through topmost markobject
+DUP             = ord('2')   # duplicate top stack item
+FLOAT           = ord('F')   # push float object; decimal string argument
+INT             = ord('I')   # push integer or bool; decimal string argument
+BININT          = ord('J')   # push four-byte signed int
+BININT1         = ord('K')   # push 1-byte unsigned int
+LONG            = ord('L')   # push long; decimal string argument
+BININT2         = ord('M')   # push 2-byte unsigned int
+NONE            = ord('N')   # push None
+PERSID          = ord('P')   # push persistent object; id is taken from string arg
+BINPERSID       = ord('Q')   #  "       "         "  ;  "  "   "     "  stack
+REDUCE          = ord('R')   # apply callable to argtuple, both on stack
+STRING          = ord('S')   # push string; NL-terminated string argument
+BINSTRING       = ord('T')   # push string; counted binary string argument
+SHORT_BINSTRING = ord('U')   #  "     "   ;    "      "       "      " < 256 bytes
+UNICODE         = ord('V')   # push Unicode string; raw-unicode-escaped'd argument
+BINUNICODE      = ord('X')   #   "     "       "  ; counted UTF-8 string argument
+APPEND          = ord('a')   # append stack top to list below it
+BUILD           = ord('b')   # call __setstate__ or __dict__.update()
+GLOBAL          = ord('c')   # push self.find_class(modname, name); 2 string args
+DICT            = ord('d')   # build a dict from stack items
+EMPTY_DICT      = ord('}')   # push empty dict
+APPENDS         = ord('e')   # extend list on stack by topmost stack slice
+GET             = ord('g')   # push item from memo on stack; index is string arg
+BINGET          = ord('h')   #   "    "    "    "   "   "  ;   "    " 1-byte arg
+INST            = ord('i')   # build & push class instance
+LONG_BINGET     = ord('j')   # push item from memo on stack; index is 4-byte arg
+LIST            = ord('l')   # build list from topmost stack items
+EMPTY_LIST      = ord(']')   # push empty list
+OBJ             = ord('o')   # build & push class instance
+PUT             = ord('p')   # store stack top in memo; index is string arg
+BINPUT          = ord('q')   #   "     "    "   "   " ;   "    " 1-byte arg
+LONG_BINPUT     = ord('r')   #   "     "    "   "   " ;   "    " 4-byte arg
+SETITEM         = ord('s')   # add key+value pair to dict
+TUPLE           = ord('t')   # build tuple from topmost stack items
+EMPTY_TUPLE     = ord(')')   # push empty tuple
+SETITEMS        = ord('u')   # modify dict by adding topmost key+value pairs
+BINFLOAT        = ord('G')   # push float; arg is 8-byte float encoding
+
+TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
+FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
+
+# Protocol 2
+
+PROTO           = ord('\x80')  # identify pickle protocol
+NEWOBJ          = ord('\x81')  # build object by applying cls.__new__ to argtuple
+EXT1            = ord('\x82')  # push object from extension registry; 1-byte index
+EXT2            = ord('\x83')  # ditto, but 2-byte index
+EXT4            = ord('\x84')  # ditto, but 4-byte index
+TUPLE1          = ord('\x85')  # build 1-tuple from stack top
+TUPLE2          = ord('\x86')  # build 2-tuple from two topmost stack items
+TUPLE3          = ord('\x87')  # build 3-tuple from three topmost stack items
+NEWTRUE         = ord('\x88')  # push True
+NEWFALSE        = ord('\x89')  # push False
+LONG1           = ord('\x8a')  # push long from < 256 bytes
+LONG4           = ord('\x8b')  # push really big long
+
+_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+
+
 # ____________________________________________________________
 # XXX some temporary dark magic to produce pickled dumps that are
 #     closer to the ones produced by cPickle in CPython
@@ -44,3 +123,474 @@
     file = StringIO()
     Pickler(file, protocol).dump(obj)
     return file.getvalue()
+
+# Why use struct.pack() for pickling but marshal.loads() for
+# unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
+# marshal.loads() is twice as fast as struct.unpack()!
+mloads = marshal.loads
+
+# Unpickling machinery
+
+class Unpickler(object):
+
+    def __init__(self, file):
+        """This takes a file-like object for reading a pickle data stream.
+
+        The protocol version of the pickle is detected automatically, so no
+        proto argument is needed.
+
+        The file-like object must have two methods, a read() method that
+        takes an integer argument, and a readline() method that requires no
+        arguments.  Both methods should return a string.  Thus file-like
+        object can be a file object opened for reading, a StringIO object,
+        or any other custom object that meets this interface.
+        """
+        self.readline = file.readline
+        self.read = file.read
+        self.memo = {}
+
+    def load(self):
+        """Read a pickled object representation from the open file.
+
+        Return the reconstituted object hierarchy specified in the file.
+        """
+        self.mark = object() # any new unique object
+        self.stack = []
+        self.append = self.stack.append
+        try:
+            key = ord(self.read(1))
+            while key != STOP:
+                self.dispatch[key](self)
+                key = ord(self.read(1))
+        except TypeError:
+            if self.read(1) == '':
+                raise EOFError
+            raise
+        return self.stack.pop()
+
+    # Return largest index k such that self.stack[k] is self.mark.
+    # If the stack doesn't contain a mark, eventually raises IndexError.
+    # This could be sped by maintaining another stack, of indices at which
+    # the mark appears.  For that matter, the latter stack would suffice,
+    # and we wouldn't need to push mark objects on self.stack at all.
+    # Doing so is probably a good thing, though, since if the pickle is
+    # corrupt (or hostile) we may get a clue from finding self.mark embedded
+    # in unpickled objects.
+    def marker(self):
+        k = len(self.stack)-1
+        while self.stack[k] is not self.mark: k -= 1
+        return k
+
+    dispatch = {}
+
+    def load_proto(self):
+        proto = ord(self.read(1))
+        if not 0 <= proto <= 2:
+            raise ValueError, "unsupported pickle protocol: %d" % proto
+    dispatch[PROTO] = load_proto
+
+    def load_persid(self):
+        pid = self.readline()[:-1]
+        self.append(self.persistent_load(pid))
+    dispatch[PERSID] = load_persid
+
+    def load_binpersid(self):
+        pid = self.stack.pop()
+        self.append(self.persistent_load(pid))
+    dispatch[BINPERSID] = load_binpersid
+
+    def load_none(self):
+        self.append(None)
+    dispatch[NONE] = load_none
+
+    def load_false(self):
+        self.append(False)
+    dispatch[NEWFALSE] = load_false
+
+    def load_true(self):
+        self.append(True)
+    dispatch[NEWTRUE] = load_true
+
+    def load_int(self):
+        data = self.readline()
+        if data == FALSE[1:]:
+            val = False
+        elif data == TRUE[1:]:
+            val = True
+        else:
+            try:
+                val = int(data)
+            except ValueError:
+                val = long(data)
+        self.append(val)
+    dispatch[INT] = load_int
+
+    def load_binint(self):
+        self.append(mloads('i' + self.read(4)))
+    dispatch[BININT] = load_binint
+
+    def load_binint1(self):
+        self.append(ord(self.read(1)))
+    dispatch[BININT1] = load_binint1
+
+    def load_binint2(self):
+        self.append(mloads('i' + self.read(2) + '\000\000'))
+    dispatch[BININT2] = load_binint2
+
+    def load_long(self):
+        self.append(long(self.readline()[:-1], 0))
+    dispatch[LONG] = load_long
+
+    def load_long1(self):
+        n = ord(self.read(1))
+        bytes = self.read(n)
+        self.append(decode_long(bytes))
+    dispatch[LONG1] = load_long1
+
+    def load_long4(self):
+        n = mloads('i' + self.read(4))
+        bytes = self.read(n)
+        self.append(decode_long(bytes))
+    dispatch[LONG4] = load_long4
+
+    def load_float(self):
+        self.append(float(self.readline()[:-1]))
+    dispatch[FLOAT] = load_float
+
+    def load_binfloat(self, unpack=struct.unpack):
+        self.append(unpack('>d', self.read(8))[0])
+    dispatch[BINFLOAT] = load_binfloat
+
+    def load_string(self):
+        rep = self.readline()
+        if len(rep) < 3:
+            raise ValueError, "insecure string pickle"
+        if rep[0] == "'" == rep[-2]:
+            rep = rep[1:-2]
+        elif rep[0] == '"' == rep[-2]:
+            rep = rep[1:-2]
+        else:
+            raise ValueError, "insecure string pickle"
+        self.append(rep.decode("string-escape"))
+    dispatch[STRING] = load_string
+
+    def load_binstring(self):
+        L = mloads('i' + self.read(4))
+        self.append(self.read(L))
+    dispatch[BINSTRING] = load_binstring
+
+    def load_unicode(self):
+        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
+    dispatch[UNICODE] = load_unicode
+
+    def load_binunicode(self):
+        L = mloads('i' + self.read(4))
+        self.append(unicode(self.read(L),'utf-8'))
+    dispatch[BINUNICODE] = load_binunicode
+
+    def load_short_binstring(self):
+        L = ord(self.read(1))
+        self.append(self.read(L))
+    dispatch[SHORT_BINSTRING] = load_short_binstring
+
+    def load_tuple(self):
+        k = self.marker()
+        self.stack[k:] = [tuple(self.stack[k+1:])]
+    dispatch[TUPLE] = load_tuple
+
+    def load_empty_tuple(self):
+        self.stack.append(())
+    dispatch[EMPTY_TUPLE] = load_empty_tuple
+
+    def load_tuple1(self):
+        self.stack[-1] = (self.stack[-1],)
+    dispatch[TUPLE1] = load_tuple1
+
+    def load_tuple2(self):
+        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
+    dispatch[TUPLE2] = load_tuple2
+
+    def load_tuple3(self):
+        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
+    dispatch[TUPLE3] = load_tuple3
+
+    def load_empty_list(self):
+        self.stack.append([])
+    dispatch[EMPTY_LIST] = load_empty_list
+
+    def load_empty_dictionary(self):
+        self.stack.append({})
+    dispatch[EMPTY_DICT] = load_empty_dictionary
+
+    def load_list(self):
+        k = self.marker()
+        self.stack[k:] = [self.stack[k+1:]]
+    dispatch[LIST] = load_list
+
+    def load_dict(self):
+        k = self.marker()
+        d = {}
+        items = self.stack[k+1:]
+        for i in range(0, len(items), 2):
+            key = items[i]
+            value = items[i+1]
+            d[key] = value
+        self.stack[k:] = [d]
+    dispatch[DICT] = load_dict
+
+    # INST and OBJ differ only in how they get a class object.  It's not
+    # only sensible to do the rest in a common routine, the two routines
+    # previously diverged and grew different bugs.
+    # klass is the class to instantiate, and k points to the topmost mark
+    # object, following which are the arguments for klass.__init__.
+    def _instantiate(self, klass, k):
+        args = tuple(self.stack[k+1:])
+        del self.stack[k:]
+        instantiated = 0
+        if (not args and
+                type(klass) is ClassType and
+                not hasattr(klass, "__getinitargs__")):
+            try:
+                value = _EmptyClass()
+                value.__class__ = klass
+                instantiated = 1
+            except RuntimeError:
+                # In restricted execution, assignment to inst.__class__ is
+                # prohibited
+                pass
+        if not instantiated:
+            try:
+                value = klass(*args)
+            except TypeError, err:
+                raise TypeError, "in constructor for %s: %s" % (
+                    klass.__name__, str(err)), sys.exc_info()[2]
+        self.append(value)
+
+    def load_inst(self):
+        module = self.readline()[:-1]
+        name = self.readline()[:-1]
+        klass = self.find_class(module, name)
+        self._instantiate(klass, self.marker())
+    dispatch[INST] = load_inst
+
+    def load_obj(self):
+        # Stack is ... markobject classobject arg1 arg2 ...
+        k = self.marker()
+        klass = self.stack.pop(k+1)
+        self._instantiate(klass, k)
+    dispatch[OBJ] = load_obj
+
+    def load_newobj(self):
+        args = self.stack.pop()
+        cls = self.stack[-1]
+        obj = cls.__new__(cls, *args)
+        self.stack[-1] = obj
+    dispatch[NEWOBJ] = load_newobj
+
+    def load_global(self):
+        module = self.readline()[:-1]
+        name = self.readline()[:-1]
+        klass = self.find_class(module, name)
+        self.append(klass)
+    dispatch[GLOBAL] = load_global
+
+    def load_ext1(self):
+        code = ord(self.read(1))
+        self.get_extension(code)
+    dispatch[EXT1] = load_ext1
+
+    def load_ext2(self):
+        code = mloads('i' + self.read(2) + '\000\000')
+        self.get_extension(code)
+    dispatch[EXT2] = load_ext2
+
+    def load_ext4(self):
+        code = mloads('i' + self.read(4))
+        self.get_extension(code)
+    dispatch[EXT4] = load_ext4
+
+    def get_extension(self, code):
+        nil = []
+        obj = _extension_cache.get(code, nil)
+        if obj is not nil:
+            self.append(obj)
+            return
+        key = _inverted_registry.get(code)
+        if not key:
+            raise ValueError("unregistered extension code %d" % code)
+        obj = self.find_class(*key)
+        _extension_cache[code] = obj
+        self.append(obj)
+
+    def find_class(self, module, name):
+        # Subclasses may override this
+        __import__(module)
+        mod = sys.modules[module]
+        klass = getattr(mod, name)
+        return klass
+
+    def load_reduce(self):
+        args = self.stack.pop()
+        func = self.stack[-1]
+        value = self.stack[-1](*args)
+        self.stack[-1] = value
+    dispatch[REDUCE] = load_reduce
+
+    def load_pop(self):
+        del self.stack[-1]
+    dispatch[POP] = load_pop
+
+    def load_pop_mark(self):
+        k = self.marker()
+        del self.stack[k:]
+    dispatch[POP_MARK] = load_pop_mark
+
+    def load_dup(self):
+        self.append(self.stack[-1])
+    dispatch[DUP] = load_dup
+
+    def load_get(self):
+        self.append(self.memo[self.readline()[:-1]])
+    dispatch[GET] = load_get
+
+    def load_binget(self):
+        i = ord(self.read(1))
+        self.append(self.memo[repr(i)])
+    dispatch[BINGET] = load_binget
+
+    def load_long_binget(self):
+        i = mloads('i' + self.read(4))
+        self.append(self.memo[repr(i)])
+    dispatch[LONG_BINGET] = load_long_binget
+
+    def load_put(self):
+        self.memo[self.readline()[:-1]] = self.stack[-1]
+    dispatch[PUT] = load_put
+
+    def load_binput(self):
+        i = ord(self.read(1))
+        self.memo[repr(i)] = self.stack[-1]
+    dispatch[BINPUT] = load_binput
+
+    def load_long_binput(self):
+        i = mloads('i' + self.read(4))
+        self.memo[repr(i)] = self.stack[-1]
+    dispatch[LONG_BINPUT] = load_long_binput
+
+    def load_append(self):
+        value = self.stack.pop()
+        self.stack[-1].append(value)
+    dispatch[APPEND] = load_append
+
+    def load_appends(self):
+        stack = self.stack
+        mark = self.marker()
+        lst = stack[mark - 1]
+        lst.extend(stack[mark + 1:])
+        del stack[mark:]
+    dispatch[APPENDS] = load_appends
+
+    def load_setitem(self):
+        stack = self.stack
+        value = stack.pop()
+        key = stack.pop()
+        dict = stack[-1]
+        dict[key] = value
+    dispatch[SETITEM] = load_setitem
+
+    def load_setitems(self):
+        stack = self.stack
+        mark = self.marker()
+        dict = stack[mark - 1]
+        for i in range(mark + 1, len(stack), 2):
+            dict[stack[i]] = stack[i + 1]
+
+        del stack[mark:]
+    dispatch[SETITEMS] = load_setitems
+
+    def load_build(self):
+        stack = self.stack
+        state = stack.pop()
+        inst = stack[-1]
+        setstate = getattr(inst, "__setstate__", None)
+        if setstate:
+            setstate(state)
+            return
+        slotstate = None
+        if isinstance(state, tuple) and len(state) == 2:
+            state, slotstate = state
+        if state:
+            try:
+                d = inst.__dict__
+                try:
+                    for k, v in state.iteritems():
+                        d[intern(k)] = v
+                # keys in state don't have to be strings
+                # don't blow up, but don't go out of our way
+                except TypeError:
+                    d.update(state)
+
+            except RuntimeError:
+                # XXX In restricted execution, the instance's __dict__
+                # is not accessible.  Use the old way of unpickling
+                # the instance variables.  This is a semantic
+                # difference when unpickling in restricted
+                # vs. unrestricted modes.
+                # Note, however, that cPickle has never tried to do the
+                # .update() business, and always uses
+                #     PyObject_SetItem(inst.__dict__, key, value) in a
+                # loop over state.items().
+                for k, v in state.items():
+                    setattr(inst, k, v)
+        if slotstate:
+            for k, v in slotstate.items():
+                setattr(inst, k, v)
+    dispatch[BUILD] = load_build
+
+    def load_mark(self):
+        self.append(self.mark)
+    dispatch[MARK] = load_mark
+
+#from pickle import decode_long
+
+def decode_long(data):
+    r"""Decode a long from a two's complement little-endian binary string.
+
+    >>> decode_long('')
+    0L
+    >>> decode_long("\xff\x00")
+    255L
+    >>> decode_long("\xff\x7f")
+    32767L
+    >>> decode_long("\x00\xff")
+    -256L
+    >>> decode_long("\x00\x80")
+    -32768L
+    >>> decode_long("\x80")
+    -128L
+    >>> decode_long("\x7f")
+    127L
+    """
+
+    nbytes = len(data)
+    if nbytes == 0:
+        return 0L
+    ind = nbytes - 1
+    while ind and ord(data[ind]) == 0:
+        ind -= 1
+    n = ord(data[ind])
+    while ind:
+        n <<= 8
+        ind -= 1
+        if ord(data[ind]):
+            n += ord(data[ind])
+    if ord(data[nbytes - 1]) >= 128:
+        n -= 1L << (nbytes << 3)
+    return n
+
+def load(f):
+    return Unpickler(f).load()
+
+def loads(str):
+    f = StringIO(str)
+    return Unpickler(f).load()
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1032,8 +1032,8 @@
     def __setstate(self, string):
         if len(string) != 4 or not (1 <= ord(string[2]) <= 12):
             raise TypeError("not enough arguments")
-        yhi, ylo, self._month, self._day = map(ord, string)
-        self._year = yhi * 256 + ylo
+        self._month, self._day = ord(string[2]), ord(string[3])
+        self._year = ord(string[0]) * 256 + ord(string[1])
 
     def __reduce__(self):
         return (self.__class__, self._getstate())
@@ -1421,9 +1421,10 @@
     def __setstate(self, string, tzinfo):
         if len(string) != 6 or ord(string[0]) >= 24:
             raise TypeError("an integer is required")
-        self._hour, self._minute, self._second, us1, us2, us3 = \
-                                                            map(ord, string)
-        self._microsecond = (((us1 << 8) | us2) << 8) | us3
+        self._hour, self._minute, self._second = ord(string[0]), \
+                                                 ord(string[1]), ord(string[2])
+        self._microsecond = (((ord(string[3]) << 8) | \
+                            ord(string[4])) << 8) | ord(string[5])
         self._tzinfo = tzinfo
 
     def __reduce__(self):
@@ -1903,10 +1904,11 @@
             return (basestate, self._tzinfo)
 
     def __setstate(self, string, tzinfo):
-        (yhi, ylo, self._month, self._day, self._hour,
-         self._minute, self._second, us1, us2, us3) = map(ord, string)
-        self._year = yhi * 256 + ylo
-        self._microsecond = (((us1 << 8) | us2) << 8) | us3
+        (self._month, self._day, self._hour, self._minute,
+            self._second) = (ord(string[2]), ord(string[3]), ord(string[4]),
+                             ord(string[5]), ord(string[6]))
+        self._year = ord(string[0]) * 256 + ord(string[1])
+        self._microsecond = (((ord(string[7]) << 8) | ord(string[8])) << 8) | ord(string[9])
         self._tzinfo = tzinfo
 
     def __reduce__(self):
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -37,7 +37,11 @@
     try:
         realresult = func(*args)
     except (ValueError, OverflowError):
-        return s_ImpossibleValue   # no possible answer for this precise input
+        # no possible answer for this precise input.  Be conservative
+        # and keep the computation non-constant.  Example:
+        # unichr(constant-that-doesn't-fit-16-bits) on platforms where
+        # the underlying Python has sys.maxunicode == 0xffff.
+        return s_result
     s_realresult = immutablevalue(realresult)
     if not s_result.contains(s_realresult):
         raise Exception("%s%r returned %r, which is not contained in %s" % (
diff --git a/pypy/annotation/classdef.py b/pypy/annotation/classdef.py
--- a/pypy/annotation/classdef.py
+++ b/pypy/annotation/classdef.py
@@ -134,12 +134,19 @@
             if self.name not in homedef.classdesc.all_enforced_attrs:
                 self.attr_allowed = False
                 if not self.readonly:
-                    raise NoSuchAttrError(homedef, self.name)
+                    raise NoSuchAttrError(
+                        "setting forbidden attribute %r on %r" % (
+                        self.name, homedef))
 
     def modified(self, classdef='?'):
         self.readonly = False
         if not self.attr_allowed:
-            raise NoSuchAttrError(classdef, self.name)
+            raise NoSuchAttrError(
+                "Attribute %r on %r should be read-only.\n" % (self.name,
+                                                               classdef) +
+                "This error can be caused by another 'getattr' that promoted\n"
+                "the attribute here; the list of read locations is:\n" +
+                '\n'.join([str(loc[0]) for loc in self.read_locations]))
 
 
 class ClassDef(object):
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -398,7 +398,6 @@
             cls = pyobj
             base = object
             baselist = list(cls.__bases__)
-            baselist.reverse()
 
             # special case: skip BaseException in Python 2.5, and pretend
             # that all exceptions ultimately inherit from Exception instead
@@ -408,17 +407,27 @@
             elif baselist == [py.builtin.BaseException]:
                 baselist = [Exception]
 
+            mixins_before = []
+            mixins_after = []
             for b1 in baselist:
                 if b1 is object:
                     continue
                 if b1.__dict__.get('_mixin_', False):
-                    self.add_mixin(b1)
+                    if base is object:
+                        mixins_before.append(b1)
+                    else:
+                        mixins_after.append(b1)
                 else:
                     assert base is object, ("multiple inheritance only supported "
                                             "with _mixin_: %r" % (cls,))
                     base = b1
+            if mixins_before and mixins_after:
+                raise Exception("unsupported: class %r has mixin bases both"
+                                " before and after the regular base" % (self,))
+            self.add_mixins(mixins_after, check_not_in=base)
+            self.add_mixins(mixins_before)
+            self.add_sources_for_class(cls)
 
-            self.add_sources_for_class(cls)
             if base is not object:
                 self.basedesc = bookkeeper.getdesc(base)
 
@@ -480,14 +489,30 @@
                 return
         self.classdict[name] = Constant(value)
 
-    def add_mixin(self, base):
-        for subbase in base.__bases__:
-            if subbase is object:
-                continue
-            assert subbase.__dict__.get("_mixin_", False), ("Mixin class %r has non"
-                "mixin base class %r" % (base, subbase))
-            self.add_mixin(subbase)
-        self.add_sources_for_class(base, mixin=True)
+    def add_mixins(self, mixins, check_not_in=object):
+        if not mixins:
+            return
+        A = type('tmp', tuple(mixins) + (object,), {})
+        mro = A.__mro__
+        assert mro[0] is A and mro[-1] is object
+        mro = mro[1:-1]
+        #
+        skip = set()
+        def add(cls):
+            if cls is not object:
+                for base in cls.__bases__:
+                    add(base)
+                for name in cls.__dict__:
+                    skip.add(name)
+        add(check_not_in)
+        #
+        for base in reversed(mro):
+            assert base.__dict__.get("_mixin_", False), ("Mixin class %r has non"
+                "mixin base class %r" % (mixins, base))
+            for name, value in base.__dict__.items():
+                if name in skip:
+                    continue
+                self.add_source_attribute(name, value, mixin=True)
 
     def add_sources_for_class(self, cls, mixin=False):
         for name, value in cls.__dict__.items():
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -1,15 +1,12 @@
 from __future__ import with_statement
-import autopath
 import py.test
 import sys
 from pypy import conftest
-from pypy.tool.udir import udir
 
 from pypy.annotation import model as annmodel
 from pypy.annotation.annrpython import RPythonAnnotator as _RPythonAnnotator
 from pypy.translator.translator import graphof as tgraphof
 from pypy.annotation import policy
-from pypy.annotation import specialize
 from pypy.annotation.listdef import ListDef, ListChangeUnallowed
 from pypy.annotation.dictdef import DictDef
 from pypy.objspace.flow.model import *
@@ -2431,6 +2428,93 @@
         assert isinstance(s.items[1], annmodel.SomeChar)
         assert isinstance(s.items[2], annmodel.SomeChar)
 
+    def test_mixin_first(self):
+        class Mixin(object):
+            _mixin_ = True
+            def foo(self): return 4
+        class Base(object):
+            def foo(self): return 5
+        class Concrete(Mixin, Base):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 4
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 4
+
+    def test_mixin_last(self):
+        class Mixin(object):
+            _mixin_ = True
+            def foo(self): return 4
+        class Base(object):
+            def foo(self): return 5
+        class Concrete(Base, Mixin):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 5
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 5
+
+    def test_mixin_concrete(self):
+        class Mixin(object):
+            _mixin_ = True
+            def foo(self): return 4
+        class Concrete(Mixin):
+            def foo(self): return 5
+        def f():
+            return Concrete().foo()
+
+        assert f() == 5
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 5
+
+    def test_multiple_mixins_mro(self):
+        # an obscure situation, but it occurred in module/micronumpy/types.py
+        class A(object):
+            _mixin_ = True
+            def foo(self): return 1
+        class B(A):
+            _mixin_ = True
+            def foo(self): return 2
+        class C(A):
+            _mixin_ = True
+        class D(B, C):
+            _mixin_ = True
+        class Concrete(D):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 2
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 2
+
+    def test_multiple_mixins_mro_2(self):
+        class A(object):
+            _mixin_ = True
+            def foo(self): return 1
+        class B(A):
+            _mixin_ = True
+            def foo(self): return 2
+        class C(A):
+            _mixin_ = True
+        class Concrete(C, B):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 2
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 2
+
     def test___class___attribute(self):
         class Base(object): pass
         class A(Base): pass
@@ -2469,6 +2553,26 @@
         s = a.build_types(f, [int])
         assert s.knowntype == int
 
+    def test_slots_reads(self):
+        class A(object):
+            __slots__ = ()
+        class B(A):
+            def __init__(self, x):
+                self.x = x
+        def f(x):
+            if x:
+                a = A()
+            else:
+                a = B(x)
+            return a.x   # should explode here
+
+        a = self.RPythonAnnotator()
+        e = py.test.raises(Exception, a.build_types, f, [int])
+        # this should explode on reading the attribute 'a.x', but it can
+        # sometimes explode on 'self.x = x', which does not make much sense.
+        # But it looks hard to fix in general: we don't know yet during 'a.x'
+        # if the attribute x will be read-only or read-write.
+
     def test_unboxed_value(self):
         class A(object):
             __slots__ = ()
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -13,7 +13,7 @@
                and not p.basename.startswith('test')]
 
 essential_modules = dict.fromkeys(
-    ["exceptions", "_file", "sys", "__builtin__", "posix"]
+    ["exceptions", "_file", "sys", "__builtin__", "posix", "_warnings"]
 )
 
 default_modules = essential_modules.copy()
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -313,5 +313,10 @@
   implementation detail that shows up because of internal C-level slots
   that PyPy does not have.
 
+* the ``__dict__`` attribute of new-style classes returns a normal dict, as
+  opposed to a dict proxy like in CPython. Mutating the dict will change the
+  type and vice versa. For builtin types, a dictionary will be returned that
+  cannot be changed (but still looks and behaves like a normal dictionary).
+
 
 .. include:: _ref.txt
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -610,6 +610,8 @@
     ops.JUMP_IF_FALSE_OR_POP : 0,
     ops.POP_JUMP_IF_TRUE : -1,
     ops.POP_JUMP_IF_FALSE : -1,
+
+    ops.BUILD_LIST_FROM_ARG: 1,
 }
 
 
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -965,7 +965,7 @@
         self.emit_op_arg(ops.CALL_METHOD, (kwarg_count << 8) | arg_count)
         return True
 
-    def _listcomp_generator(self, gens, gen_index, elt):
+    def _listcomp_generator(self, gens, gen_index, elt, single=False):
         start = self.new_block()
         skip = self.new_block()
         if_cleanup = self.new_block()
@@ -973,6 +973,8 @@
         gen = gens[gen_index]
         assert isinstance(gen, ast.comprehension)
         gen.iter.walkabout(self)
+        if single:
+            self.emit_op_arg(ops.BUILD_LIST_FROM_ARG, 0)
         self.emit_op(ops.GET_ITER)
         self.use_next_block(start)
         self.emit_jump(ops.FOR_ITER, anchor)
@@ -998,8 +1000,12 @@
 
     def visit_ListComp(self, lc):
         self.update_position(lc.lineno)
-        self.emit_op_arg(ops.BUILD_LIST, 0)
-        self._listcomp_generator(lc.generators, 0, lc.elt)
+        if len(lc.generators) != 1 or lc.generators[0].ifs:
+            single = False
+            self.emit_op_arg(ops.BUILD_LIST, 0)
+        else:
+            single = True
+        self._listcomp_generator(lc.generators, 0, lc.elt, single=single)
 
     def _comp_generator(self, node, generators, gen_index):
         start = self.new_block()
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -908,3 +908,17 @@
             return d['f'](5)
         """)
         assert 'generator' in space.str_w(space.repr(w_generator))
+        
+    def test_list_comprehension(self):
+        source = "def f(): [i for i in l]"
+        source2 = "def f(): [i for i in l for j in l]"
+        source3 = "def f(): [i for i in l if i]"
+        counts = self.count_instructions(source)
+        assert ops.BUILD_LIST not in counts
+        assert counts[ops.BUILD_LIST_FROM_ARG] == 1
+        counts = self.count_instructions(source2)
+        assert counts[ops.BUILD_LIST] == 1
+        assert ops.BUILD_LIST_FROM_ARG not in counts
+        counts = self.count_instructions(source3)
+        assert counts[ops.BUILD_LIST] == 1
+        assert ops.BUILD_LIST_FROM_ARG not in counts
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -7,7 +7,8 @@
 from pypy.interpreter.miscutils import ThreadLocals
 from pypy.tool.cache import Cache
 from pypy.tool.uid import HUGEVAL_BYTES
-from pypy.rlib.objectmodel import we_are_translated, newlist, compute_unique_id
+from pypy.rlib.objectmodel import we_are_translated, newlist_hint,\
+     compute_unique_id
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.rlib.timer import DummyTimer, Timer
 from pypy.rlib.rarithmetic import r_uint
@@ -328,7 +329,7 @@
                 raise
             modname = self.str_w(w_modname)
             mod = self.interpclass_w(w_mod)
-            if isinstance(mod, Module):
+            if isinstance(mod, Module) and not mod.startup_called:
                 self.timer.start("startup " + modname)
                 mod.init(self)
                 self.timer.stop("startup " + modname)
@@ -833,7 +834,7 @@
             items = []
         else:
             try:
-                items = newlist(lgt_estimate)
+                items = newlist_hint(lgt_estimate)
             except MemoryError:
                 items = [] # it might have lied
         #
@@ -1471,8 +1472,8 @@
 
     def warn(self, msg, w_warningcls):
         self.appexec([self.wrap(msg), w_warningcls], """(msg, warningcls):
-            import warnings
-            warnings.warn(msg, warningcls, stacklevel=2)
+            import _warnings
+            _warnings.warn(msg, warningcls, stacklevel=2)
         """)
 
     def resolve_target(self, w_obj):
diff --git a/pypy/interpreter/buffer.py b/pypy/interpreter/buffer.py
--- a/pypy/interpreter/buffer.py
+++ b/pypy/interpreter/buffer.py
@@ -20,6 +20,7 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.error import OperationError
 from pypy.rlib.objectmodel import compute_hash
+from pypy.rlib.rstring import StringBuilder
 
 
 class Buffer(Wrappable):
@@ -152,12 +153,13 @@
     if space.isinstance_w(w_object, space.w_unicode):
         # unicode objects support the old buffer interface
         # but not the new buffer interface (change in python  2.7)
-        from pypy.rlib.rstruct.unichar import pack_unichar
-        charlist = []
-        for unich in space.unicode_w(w_object):
-            pack_unichar(unich, charlist)
+        from pypy.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
+        unistr = space.unicode_w(w_object)
+        builder = StringBuilder(len(unistr) * UNICODE_SIZE)
+        for unich in unistr:
+            pack_unichar(unich, builder)
         from pypy.interpreter.buffer import StringBuffer
-        w_buffer = space.wrap(StringBuffer(''.join(charlist)))
+        w_buffer = space.wrap(StringBuffer(builder.build()))
     else:
         w_buffer = space.buffer(w_object)
 
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -15,9 +15,8 @@
 from pypy.rlib.rarithmetic import r_uint, intmask
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import check_nonneg
-from pypy.tool.stdlib_opcode import (bytecode_spec, host_bytecode_spec,
-                                     unrolling_all_opcode_descs, opmap,
-                                     host_opmap)
+from pypy.tool.stdlib_opcode import (bytecode_spec,
+                                     unrolling_all_opcode_descs)
 
 def unaryoperation(operationname):
     """NOT_RPYTHON"""
@@ -713,6 +712,19 @@
         w_list = self.space.newlist(items)
         self.pushvalue(w_list)
 
+    def BUILD_LIST_FROM_ARG(self, _, next_instr):
+        # this is a little dance, because list has to be before the
+        # value
+        last_val = self.popvalue()
+        try:
+            lgt = self.space.len_w(last_val)
+        except OperationError, e:
+            if e.async(self.space):
+                raise
+            lgt = 0 # oh well
+        self.pushvalue(self.space.newlist([], sizehint=lgt))
+        self.pushvalue(last_val)
+
     def LOAD_ATTR(self, nameindex, next_instr):
         "obj.attributename"
         w_obj = self.popvalue()
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,5 +1,6 @@
 from pypy.interpreter.error import OperationError
 from pypy.interpreter import unicodehelper
+from pypy.rlib.rstring import StringBuilder
 
 def parsestr(space, encoding, s, unicode_literals=False):
     # compiler.transformer.Transformer.decode_literal depends on what 
@@ -115,21 +116,23 @@
     the string is UTF-8 encoded and should be re-encoded in the
     specified encoding.
     """
-    lis = []
+    builder = StringBuilder(len(s))
     ps = 0
     end = len(s)
-    while ps < end:
-        if s[ps] != '\\':
-            # note that the C code has a label here.
-            # the logic is the same.
+    while 1:
+        ps2 = ps
+        while ps < end and s[ps] != '\\':
             if recode_encoding and ord(s[ps]) & 0x80:
                 w, ps = decode_utf8(space, s, ps, end, recode_encoding)
-                # Append bytes to output buffer.
-                lis.append(w)
+                builder.append(w)
+                ps2 = ps
             else:
-                lis.append(s[ps])
                 ps += 1
-            continue
+        if ps > ps2:
+            builder.append_slice(s, ps2, ps)
+        if ps == end:
+            break
+
         ps += 1
         if ps == end:
             raise_app_valueerror(space, 'Trailing \\ in string')
@@ -140,25 +143,25 @@
         if ch == '\n':
             pass
         elif ch == '\\':
-            lis.append('\\')
+            builder.append('\\')
         elif ch == "'":
-            lis.append("'")
+            builder.append("'")
         elif ch == '"':
-            lis.append('"')
+            builder.append('"')
         elif ch == 'b':
-            lis.append("\010")
+            builder.append("\010")
         elif ch == 'f':
-            lis.append('\014') # FF
+            builder.append('\014') # FF
         elif ch == 't':
-            lis.append('\t')
+            builder.append('\t')
         elif ch == 'n':
-            lis.append('\n')
+            builder.append('\n')
         elif ch == 'r':
-            lis.append('\r')
+            builder.append('\r')
         elif ch == 'v':
-            lis.append('\013') # VT
+            builder.append('\013') # VT
         elif ch == 'a':
-            lis.append('\007') # BEL, not classic C
+            builder.append('\007') # BEL, not classic C
         elif ch in '01234567':
             # Look for up to two more octal digits
             span = ps
@@ -168,13 +171,13 @@
             # emulate a strange wrap-around behavior of CPython:
             # \400 is the same as \000 because 0400 == 256
             num = int(octal, 8) & 0xFF
-            lis.append(chr(num))
+            builder.append(chr(num))
             ps = span
         elif ch == 'x':
             if ps+2 <= end and isxdigit(s[ps]) and isxdigit(s[ps + 1]):
                 hexa = s[ps : ps + 2]
                 num = int(hexa, 16)
-                lis.append(chr(num))
+                builder.append(chr(num))
                 ps += 2
             else:
                 raise_app_valueerror(space, 'invalid \\x escape')
@@ -184,13 +187,13 @@
             # this was not an escape, so the backslash
             # has to be added, and we start over in
             # non-escape mode.
-            lis.append('\\')
+            builder.append('\\')
             ps -= 1
             assert ps >= 0
             continue
             # an arbitry number of unescaped UTF-8 bytes may follow.
 
-    buf = ''.join(lis)
+    buf = builder.build()
     return buf
 
 
diff --git a/pypy/interpreter/streamutil.py b/pypy/interpreter/streamutil.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/streamutil.py
@@ -0,0 +1,17 @@
+from pypy.rlib.streamio import StreamError
+from pypy.interpreter.error import OperationError, wrap_oserror2
+
+def wrap_streamerror(space, e, w_filename=None):
+    if isinstance(e, StreamError):
+        return OperationError(space.w_ValueError,
+                              space.wrap(e.message))
+    elif isinstance(e, OSError):
+        return wrap_oserror_as_ioerror(space, e, w_filename)
+    else:
+        # should not happen: wrap_streamerror() is only called when
+        # StreamErrors = (OSError, StreamError) are raised
+        return OperationError(space.w_IOError, space.w_None)
+
+def wrap_oserror_as_ioerror(space, e, w_filename=None):
+    return wrap_oserror2(space, e, w_filename,
+                         w_exception_class=space.w_IOError)
diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -322,3 +322,14 @@
             space.ALL_BUILTIN_MODULES.pop()
             del space._builtinmodule_list
             mods = space.get_builtinmodule_to_install()
+
+    def test_dont_reload_builtin_mods_on_startup(self):
+        from pypy.tool.option import make_config, make_objspace
+        config = make_config(None)
+        space = make_objspace(config)
+        w_executable = space.wrap('executable')
+        assert space.str_w(space.getattr(space.sys, w_executable)) == 'py.py'
+        space.setattr(space.sys, w_executable, space.wrap('foobar'))
+        assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
+        space.startup()
+        assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -304,6 +304,42 @@
         assert_method(w_o1, "c", True)
         assert_method(w_o2, "c", False)
 
+    def test_total_ordering(self):
+        class W_SomeType(Wrappable):
+            def __init__(self, space, x):
+                self.space = space
+                self.x = x
+
+            def descr__lt(self, w_other):
+                assert isinstance(w_other, W_SomeType)
+                return self.space.wrap(self.x < w_other.x)
+
+            def descr__eq(self, w_other):
+                assert isinstance(w_other, W_SomeType)
+                return self.space.wrap(self.x == w_other.x)
+
+        W_SomeType.typedef = typedef.TypeDef(
+            'some_type',
+            __total_ordering__ = 'auto',
+            __lt__ = interp2app(W_SomeType.descr__lt),
+            __eq__ = interp2app(W_SomeType.descr__eq),
+            )
+        space = self.space
+        w_b = space.wrap(W_SomeType(space, 2))
+        w_c = space.wrap(W_SomeType(space, 2))
+        w_a = space.wrap(W_SomeType(space, 1))
+        # explicitly defined
+        assert space.is_true(space.lt(w_a, w_b))
+        assert not space.is_true(space.eq(w_a, w_b))
+        assert space.is_true(space.eq(w_b, w_c))
+        # automatically defined
+        assert space.is_true(space.le(w_a, w_b))
+        assert space.is_true(space.le(w_b, w_c))
+        assert space.is_true(space.gt(w_b, w_a))
+        assert space.is_true(space.ge(w_b, w_a))
+        assert space.is_true(space.ge(w_b, w_c))
+        assert space.is_true(space.ne(w_a, w_b))
+        assert not space.is_true(space.ne(w_b, w_c))
 
 class AppTestTypeDef:
 
diff --git a/pypy/interpreter/test/test_zpy.py b/pypy/interpreter/test/test_zpy.py
--- a/pypy/interpreter/test/test_zpy.py
+++ b/pypy/interpreter/test/test_zpy.py
@@ -17,14 +17,14 @@
 def test_executable():
     """Ensures sys.executable points to the py.py script"""
     # TODO : watch out for spaces/special chars in pypypath
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-c", "import sys;print sys.executable")
     assert output.splitlines()[-1] == pypypath
 
 def test_special_names():
     """Test the __name__ and __file__ special global names"""
     cmd = "print __name__; print '__file__' in globals()"
-    output = run(sys.executable, pypypath, '-c', cmd)
+    output = run(sys.executable, pypypath, '-S', '-c', cmd)
     assert output.splitlines()[-2] == '__main__'
     assert output.splitlines()[-1] == 'False'
 
@@ -33,24 +33,24 @@
     tmpfile.write("print __name__; print __file__\n")
     tmpfile.close()
 
-    output = run(sys.executable, pypypath, tmpfilepath)
+    output = run(sys.executable, pypypath, '-S', tmpfilepath)
     assert output.splitlines()[-2] == '__main__'
     assert output.splitlines()[-1] == str(tmpfilepath)
 
 def test_argv_command():
     """Some tests on argv"""
     # test 1 : no arguments
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-c", "import sys;print sys.argv")
     assert output.splitlines()[-1] == str(['-c'])
 
     # test 2 : some arguments after
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-c", "import sys;print sys.argv", "hello")
     assert output.splitlines()[-1] == str(['-c','hello'])
     
     # test 3 : additionnal pypy parameters
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-O", "-c", "import sys;print sys.argv", "hello")
     assert output.splitlines()[-1] == str(['-c','hello'])
 
@@ -65,15 +65,15 @@
     tmpfile.close()
 
     # test 1 : no arguments
-    output = run(sys.executable, pypypath, tmpfilepath)
+    output = run(sys.executable, pypypath, '-S', tmpfilepath)
     assert output.splitlines()[-1] == str([tmpfilepath])
     
     # test 2 : some arguments after
-    output = run(sys.executable, pypypath, tmpfilepath, "hello")
+    output = run(sys.executable, pypypath, '-S', tmpfilepath, "hello")
     assert output.splitlines()[-1] == str([tmpfilepath,'hello'])
     
     # test 3 : additionnal pypy parameters
-    output = run(sys.executable, pypypath, "-O", tmpfilepath, "hello")
+    output = run(sys.executable, pypypath, '-S', "-O", tmpfilepath, "hello")
     assert output.splitlines()[-1] == str([tmpfilepath,'hello'])
     
 
@@ -95,7 +95,7 @@
     tmpfile.write(TB_NORMALIZATION_CHK)
     tmpfile.close()
 
-    popen = subprocess.Popen([sys.executable, str(pypypath), tmpfilepath],
+    popen = subprocess.Popen([sys.executable, str(pypypath), '-S', tmpfilepath],
                              stderr=subprocess.PIPE)
     _, stderr = popen.communicate()
     assert stderr.endswith('KeyError: <normalized>\n')
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -12,7 +12,7 @@
 from pypy.rlib.jit import promote
 
 class TypeDef:
-    def __init__(self, __name, __base=None, **rawdict):
+    def __init__(self, __name, __base=None, __total_ordering__=None, **rawdict):
         "NOT_RPYTHON: initialization-time only"
         self.name = __name
         if __base is None:
@@ -34,6 +34,9 @@
         # xxx used by faking
         self.fakedcpytype = None
         self.add_entries(**rawdict)
+        assert __total_ordering__ in (None, 'auto'), "Unknown value for __total_ordering"
+        if __total_ordering__ == 'auto':
+            self.auto_total_ordering()
     
     def add_entries(self, **rawdict):
         # xxx fix the names of the methods to match what app-level expects
@@ -41,7 +44,15 @@
             if isinstance(value, (interp2app, GetSetProperty)):
                 value.name = key
         self.rawdict.update(rawdict)
-    
+
+    def auto_total_ordering(self):
+        assert '__lt__' in self.rawdict, "__total_ordering='auto' requires __lt__"
+        assert '__eq__' in self.rawdict, "__total_ordering='auto' requires __eq__"
+        self.add_entries(__le__ = auto__le__,
+                         __gt__ = auto__gt__,
+                         __ge__ = auto__ge__,
+                         __ne__ = auto__ne__)
+
     def _freeze_(self):
         # hint for the annotator: track individual constant instances of TypeDef
         return True
@@ -50,6 +61,26 @@
         return "<%s name=%r>" % (self.__class__.__name__, self.name)
 
 
+# generic special cmp methods defined on top of __lt__ and __eq__, used by
+# automatic total ordering
+
+ at interp2app
+def auto__le__(space, w_self, w_other):
+    return space.not_(space.lt(w_other, w_self))
+
+ at interp2app
+def auto__gt__(space, w_self, w_other):
+    return space.lt(w_other, w_self)
+
+ at interp2app
+def auto__ge__(space, w_self, w_other):
+    return space.not_(space.lt(w_self, w_other))
+
+ at interp2app
+def auto__ne__(space, w_self, w_other):
+    return space.not_(space.eq(w_self, w_other))
+
+
 # ____________________________________________________________
 #  Hash support
 
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -171,7 +171,7 @@
     'unicodesetitem'  : (('ref', 'int', 'int'), 'int'),
     'cast_ptr_to_int' : (('ref',), 'int'),
     'cast_int_to_ptr' : (('int',), 'ref'),
-    'debug_merge_point': (('ref', 'int'), None),
+    'debug_merge_point': (('ref', 'int', 'int'), None),
     'force_token'     : ((), 'int'),
     'call_may_force'  : (('int', 'varargs'), 'intorptr'),
     'guard_not_forced': ((), None),
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -1,7 +1,6 @@
 import os
 from pypy.rlib import rgc
 from pypy.rlib.objectmodel import we_are_translated, specialize
-from pypy.rlib.debug import fatalerror
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rclass, rstr
 from pypy.rpython.lltypesystem import llgroup
@@ -209,6 +208,7 @@
     This is the class supporting --gcrootfinder=asmgcc.
     """
     is_shadow_stack = False
+    is_64_bit = (WORD == 8)
 
     LOC_REG       = 0
     LOC_ESP_PLUS  = 1
@@ -337,17 +337,17 @@
             self._gcmap_deadentries += 1
             item += asmgcroot.arrayitemsize
 
-    def get_basic_shape(self, is_64_bit=False):
+    def get_basic_shape(self):
         # XXX: Should this code even really know about stack frame layout of
         # the JIT?
-        if is_64_bit:
-            return [chr(self.LOC_EBP_PLUS  | 8),
-                    chr(self.LOC_EBP_MINUS | 8),
-                    chr(self.LOC_EBP_MINUS | 16),
-                    chr(self.LOC_EBP_MINUS | 24),
-                    chr(self.LOC_EBP_MINUS | 32),
-                    chr(self.LOC_EBP_MINUS | 40),
-                    chr(self.LOC_EBP_PLUS  | 0),
+        if self.is_64_bit:
+            return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   8(%rbp)
+                    chr(self.LOC_EBP_MINUS | 4),    # saved %rbx:  at  -8(%rbp)
+                    chr(self.LOC_EBP_MINUS | 8),    # saved %r12:  at -16(%rbp)
+                    chr(self.LOC_EBP_MINUS | 12),   # saved %r13:  at -24(%rbp)
+                    chr(self.LOC_EBP_MINUS | 16),   # saved %r14:  at -32(%rbp)
+                    chr(self.LOC_EBP_MINUS | 20),   # saved %r15:  at -40(%rbp)
+                    chr(self.LOC_EBP_PLUS  | 0),    # saved %rbp:  at    (%rbp)
                     chr(0)]
         else:
             return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   4(%ebp)
@@ -367,7 +367,11 @@
         shape.append(chr(number | flag))
 
     def add_frame_offset(self, shape, offset):
-        assert (offset & 3) == 0
+        if self.is_64_bit:
+            assert (offset & 7) == 0
+            offset >>= 1
+        else:
+            assert (offset & 3) == 0
         if offset >= 0:
             num = self.LOC_EBP_PLUS | offset
         else:
@@ -519,7 +523,7 @@
     def initialize(self):
         pass
 
-    def get_basic_shape(self, is_64_bit=False):
+    def get_basic_shape(self):
         return []
 
     def add_frame_offset(self, shape, offset):
@@ -770,11 +774,19 @@
         self.generate_function('malloc_unicode', malloc_unicode,
                                [lltype.Signed])
 
-        # Rarely called: allocate a fixed-size amount of bytes, but
-        # not in the nursery, because it is too big.  Implemented like
-        # malloc_nursery_slowpath() above.
-        self.generate_function('malloc_fixedsize', malloc_nursery_slowpath,
-                               [lltype.Signed])
+        # Never called as far as I can tell, but there for completeness:
+        # allocate a fixed-size object, but not in the nursery, because
+        # it is too big.
+        def malloc_big_fixedsize(size, tid):
+            if self.DEBUG:
+                self._random_usage_of_xmm_registers()
+            type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
+            check_typeid(type_id)
+            return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                   type_id, size,
+                                                   False, False, False)
+        self.generate_function('malloc_big_fixedsize', malloc_big_fixedsize,
+                               [lltype.Signed] * 2)
 
     def _bh_malloc(self, sizedescr):
         from pypy.rpython.memory.gctypelayout import check_typeid
diff --git a/pypy/jit/backend/llsupport/rewrite.py b/pypy/jit/backend/llsupport/rewrite.py
--- a/pypy/jit/backend/llsupport/rewrite.py
+++ b/pypy/jit/backend/llsupport/rewrite.py
@@ -96,8 +96,10 @@
     def handle_new_fixedsize(self, descr, op):
         assert isinstance(descr, SizeDescr)
         size = descr.size
-        self.gen_malloc_nursery(size, op.result)
-        self.gen_initialize_tid(op.result, descr.tid)
+        if self.gen_malloc_nursery(size, op.result):
+            self.gen_initialize_tid(op.result, descr.tid)
+        else:
+            self.gen_malloc_fixedsize(size, descr.tid, op.result)
 
     def handle_new_array(self, arraydescr, op):
         v_length = op.getarg(0)
@@ -112,8 +114,8 @@
                 pass    # total_size is still -1
         elif arraydescr.itemsize == 0:
             total_size = arraydescr.basesize
-        if 0 <= total_size <= 0xffffff:     # up to 16MB, arbitrarily
-            self.gen_malloc_nursery(total_size, op.result)
+        if (total_size >= 0 and
+                self.gen_malloc_nursery(total_size, op.result)):
             self.gen_initialize_tid(op.result, arraydescr.tid)
             self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
         elif self.gc_ll_descr.kind == 'boehm':
@@ -147,13 +149,22 @@
         # mark 'v_result' as freshly malloced
         self.recent_mallocs[v_result] = None
 
-    def gen_malloc_fixedsize(self, size, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, Const(size)).
-        Note that with the framework GC, this should be called very rarely.
+    def gen_malloc_fixedsize(self, size, typeid, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, ...).
+        Used on Boehm, and on the framework GC for large fixed-size
+        mallocs.  (For all I know this latter case never occurs in
+        practice, but better safe than sorry.)
         """
-        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
-        self._gen_call_malloc_gc([ConstInt(addr), ConstInt(size)], v_result,
-                                 self.gc_ll_descr.malloc_fixedsize_descr)
+        if self.gc_ll_descr.fielddescr_tid is not None:  # framework GC
+            assert (size & (WORD-1)) == 0, "size not aligned?"
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_big_fixedsize')
+            args = [ConstInt(addr), ConstInt(size), ConstInt(typeid)]
+            descr = self.gc_ll_descr.malloc_big_fixedsize_descr
+        else:                                            # Boehm
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
+            args = [ConstInt(addr), ConstInt(size)]
+            descr = self.gc_ll_descr.malloc_fixedsize_descr
+        self._gen_call_malloc_gc(args, v_result, descr)
 
     def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result):
         """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) for Boehm."""
@@ -211,8 +222,7 @@
         """
         size = self.round_up_for_allocation(size)
         if not self.gc_ll_descr.can_use_nursery_malloc(size):
-            self.gen_malloc_fixedsize(size, v_result)
-            return
+            return False
         #
         op = None
         if self._op_malloc_nursery is not None:
@@ -238,6 +248,7 @@
         self._previous_size = size
         self._v_last_malloced_nursery = v_result
         self.recent_mallocs[v_result] = None
+        return True
 
     def gen_initialize_tid(self, v_newgcobj, tid):
         if self.gc_ll_descr.fielddescr_tid is not None:
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -57,6 +57,7 @@
         def frame_pos(n):
             return -4*(4+n)
         gcrootmap = GcRootMap_asmgcc()
+        gcrootmap.is_64_bit = False
         num1 = frame_pos(-5)
         num1a = num1|2
         num2 = frame_pos(55)
diff --git a/pypy/jit/backend/llsupport/test/test_rewrite.py b/pypy/jit/backend/llsupport/test/test_rewrite.py
--- a/pypy/jit/backend/llsupport/test/test_rewrite.py
+++ b/pypy/jit/backend/llsupport/test/test_rewrite.py
@@ -119,12 +119,19 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
-                                %(adescr.basesize + 10 * adescr.itemsize)d, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 10, descr=alendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(adescr.basesize)d,        \
+                                10,                         \
+                                %(adescr.itemsize)d,        \
+                                %(adescr.lendescr.offset)d, \
+                                descr=malloc_array_descr)
             jump()
         """)
+##      should ideally be:
+##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
+##                                %(adescr.basesize + 10 * adescr.itemsize)d, \
+##                                descr=malloc_fixedsize_descr)
+##            setfield_gc(p0, 10, descr=alendescr)
 
     def test_new_array_variable(self):
         self.check_rewrite("""
@@ -178,13 +185,20 @@
             jump()
         """, """
             [i1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
-                                %(unicodedescr.basesize +       \
-                                  10 * unicodedescr.itemsize)d, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 10, descr=unicodelendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(unicodedescr.basesize)d,  \
+                                10,                         \
+                                %(unicodedescr.itemsize)d,  \
+                                %(unicodelendescr.offset)d, \
+                                descr=malloc_array_descr)
             jump()
         """)
+##      should ideally be:
+##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
+##                                %(unicodedescr.basesize +       \
+##                                  10 * unicodedescr.itemsize)d, \
+##                                descr=malloc_fixedsize_descr)
+##            setfield_gc(p0, 10, descr=unicodelendescr)
 
 
 class TestFramework(RewriteTests):
@@ -203,7 +217,7 @@
         #
         class FakeCPU(object):
             def sizeof(self, STRUCT):
-                descr = SizeDescrWithVTable(102)
+                descr = SizeDescrWithVTable(104)
                 descr.tid = 9315
                 return descr
         self.cpu = FakeCPU()
@@ -368,11 +382,9 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
-                                %(bdescr.basesize + 104)d,    \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 8765, descr=tiddescr)
-            setfield_gc(p0, 103, descr=blendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
+                                %(bdescr.tid)d, 103,          \
+                                descr=malloc_array_descr)
             jump()
         """)
 
@@ -435,9 +447,8 @@
             jump()
         """, """
             [p1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), 104, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 9315, descr=tiddescr)
+            p0 = call_malloc_gc(ConstClass(malloc_big_fixedsize), 104, 9315, \
+                                descr=malloc_big_fixedsize_descr)
             setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
             jump()
         """)
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -266,6 +266,38 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
 
+    def test_compile_big_bridge_out_of_small_loop(self):
+        i0 = BoxInt()
+        faildescr1 = BasicFailDescr(1)
+        looptoken = JitCellToken()
+        operations = [
+            ResOperation(rop.GUARD_FALSE, [i0], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(2)),
+            ]
+        inputargs = [i0]
+        operations[0].setfailargs([i0])
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+
+        i1list = [BoxInt() for i in range(1000)]
+        bridge = []
+        iprev = i0
+        for i1 in i1list:
+            bridge.append(ResOperation(rop.INT_ADD, [iprev, ConstInt(1)], i1))
+            iprev = i1
+        bridge.append(ResOperation(rop.GUARD_FALSE, [i0], None,
+                                   descr=BasicFailDescr(3)))
+        bridge.append(ResOperation(rop.FINISH, [], None,
+                                   descr=BasicFailDescr(4)))
+        bridge[-2].setfailargs(i1list)
+
+        self.cpu.compile_bridge(faildescr1, [i0], bridge, looptoken)
+
+        fail = self.cpu.execute_token(looptoken, 1)
+        assert fail.identifier == 3
+        for i in range(1000):
+            res = self.cpu.get_latest_value_int(i)
+            assert res == 2 + i
+
     def test_get_latest_value_count(self):
         i0 = BoxInt()
         i1 = BoxInt()
@@ -572,7 +604,7 @@
                                          [funcbox, BoxInt(arg1), BoxInt(arg2)],
                                          'int', descr=calldescr)
             assert res.getint() == f(arg1, arg2)
-        
+
     def test_call_stack_alignment(self):
         # test stack alignment issues, notably for Mac OS/X.
         # also test the ordering of the arguments.
@@ -1458,7 +1490,8 @@
     def test_noops(self):
         c_box = self.alloc_string("hi there").constbox()
         c_nest = ConstInt(0)
-        self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest], 'void')
+        c_id = ConstInt(0)
+        self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest, c_id], 'void')
         self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
                                                c_nest, c_nest], 'void')
 
@@ -3029,7 +3062,7 @@
             ResOperation(rop.JUMP, [i2], None, descr=targettoken2),
             ]
         self.cpu.compile_bridge(faildescr, inputargs, operations, looptoken)
-        
+
         fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 3
         res = self.cpu.get_latest_value_int(0)
@@ -3074,7 +3107,7 @@
             assert len(mc) == len(ops)
             for i in range(len(mc)):
                 assert mc[i].split("\t")[-1].startswith(ops[i])
-            
+
         data = ctypes.string_at(info.asmaddr, info.asmlen)
         mc = list(machine_code_dump(data, info.asmaddr, cpuname))
         lines = [line for line in mc if line.count('\t') == 2]
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -88,7 +88,6 @@
         self._debug = False
         self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
         self.fail_boxes_count = 0
-        self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
         self.propagate_exception_path = 0
@@ -442,10 +441,8 @@
         looppos = self.mc.get_relative_pos()
         looptoken._x86_loop_code = looppos
         clt.frame_depth = -1     # temporarily
-        clt.param_depth = -1     # temporarily
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        frame_depth = self._assemble(regalloc, operations)
         clt.frame_depth = frame_depth
-        clt.param_depth = param_depth
         #
         size_excluding_failure_stuff = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
@@ -459,8 +456,7 @@
             rawstart + size_excluding_failure_stuff,
             rawstart))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
         self.patch_pending_failure_recoveries(rawstart)
         #
         ops_offset = self.mc.ops_offset
@@ -500,14 +496,13 @@
             assert ([loc.assembler() for loc in arglocs] ==
                     [loc.assembler() for loc in faildescr._x86_debug_faillocs])
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        fail_depths = faildescr._x86_current_depths
         startpos = self.mc.get_relative_pos()
-        operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
+        operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
                                              self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        frame_depth = self._assemble(regalloc, operations)
         codeendpos = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
         fullsize = self.mc.get_relative_pos()
@@ -517,19 +512,16 @@
         debug_print("bridge out of Guard %d has address %x to %x" %
                     (descr_number, rawstart, rawstart + codeendpos))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
         self.patch_pending_failure_recoveries(rawstart)
         if not we_are_translated():
             # for the benefit of tests
             faildescr._x86_bridge_frame_depth = frame_depth
-            faildescr._x86_bridge_param_depth = param_depth
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart)
         ops_offset = self.mc.ops_offset
         self.fixup_target_tokens(rawstart)
         self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
-        self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -700,15 +692,12 @@
         regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
-        frame_depth = regalloc.fm.get_frame_depth()
-        param_depth = regalloc.param_depth
+        frame_depth = regalloc.get_final_frame_depth()
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
             target_frame_depth = jump_target_descr._x86_clt.frame_depth
-            target_param_depth = jump_target_descr._x86_clt.param_depth
             frame_depth = max(frame_depth, target_frame_depth)
-            param_depth = max(param_depth, target_param_depth)
-        return frame_depth, param_depth
+        return frame_depth
 
     def _patchable_stackadjust(self):
         # stack adjustment LEA
@@ -892,10 +881,9 @@
         genop_math_list[oopspecindex](self, op, arglocs, resloc)
 
     def regalloc_perform_with_guard(self, op, guard_op, faillocs,
-                                    arglocs, resloc, current_depths):
+                                    arglocs, resloc):
         faildescr = guard_op.getdescr()
         assert isinstance(faildescr, AbstractFailDescr)
-        faildescr._x86_current_depths = current_depths
         failargs = guard_op.getfailargs()
         guard_opnum = guard_op.getopnum()
         guard_token = self.implement_guard_recovery(guard_opnum,
@@ -911,10 +899,9 @@
             # must be added by the genop_guard_list[]()
             assert guard_token is self.pending_guard_tokens[-1]
 
-    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
-                               current_depths):
+    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
         self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
-                                         resloc, current_depths)
+                                         resloc)
 
     def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
         self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
@@ -1038,13 +1025,14 @@
                     self.mc.MOV(tmp, loc)
                     self.mc.MOV_sr(p, tmp.value)
             p += loc.get_width()
-        self._regalloc.reserve_param(p//WORD)
         # x is a location
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
         #
         if callconv != FFI_DEFAULT_ABI:
             self._fix_stdcall(callconv, p)
+        #
+        self._regalloc.needed_extra_stack_locations(p//WORD)
 
     def _fix_stdcall(self, callconv, p):
         from pypy.rlib.clibffi import FFI_STDCALL
@@ -1127,9 +1115,9 @@
             x = r10
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
 
-        self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
+        self._regalloc.needed_extra_stack_locations(len(pass_on_stack))
 
     def call(self, addr, args, res):
         force_index = self.write_new_force_index()
@@ -2136,7 +2124,6 @@
             if reg in save_registers:
                 self.mc.MOV_sr(p, reg.value)
                 p += WORD
-        self._regalloc.reserve_param(p//WORD)
         #
         if gcrootmap.is_shadow_stack:
             args = []
@@ -2192,6 +2179,7 @@
             if reg in save_registers:
                 self.mc.MOV_rs(reg.value, p)
                 p += WORD
+        self._regalloc.needed_extra_stack_locations(p//WORD)
 
     def call_reacquire_gil(self, gcrootmap, save_loc):
         # save the previous result (eax/xmm0) into the stack temporarily.
@@ -2199,7 +2187,6 @@
         # to save xmm0 in this case.
         if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
             self.mc.MOV_sr(WORD, save_loc.value)
-            self._regalloc.reserve_param(2)
         # call the reopenstack() function (also reacquiring the GIL)
         if gcrootmap.is_shadow_stack:
             args = []
@@ -2219,6 +2206,7 @@
         # restore the result from the stack
         if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
             self.mc.MOV_rs(save_loc.value, WORD)
+            self._regalloc.needed_extra_stack_locations(2)
 
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
@@ -2495,11 +2483,6 @@
         # copy of heap(nursery_free_adr), so that the final MOV below is
         # a no-op.
 
-        # reserve room for the argument to the real malloc and the
-        # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
-        # word)
-        self._regalloc.reserve_param(1+16)
-
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
         if not shadow_stack:
@@ -2510,6 +2493,11 @@
         slowpath_addr2 = self.malloc_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 
+        # reserve room for the argument to the real malloc and the
+        # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
+        # word)
+        self._regalloc.needed_extra_stack_locations(1+16)
+
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -19,8 +19,8 @@
 
 
 class MachineCodeBlockWrapper(BlockBuilderMixin,
-                              codebuilder_cls,
-                              LocationCodeBuilder):
+                              LocationCodeBuilder,
+                              codebuilder_cls):
     def __init__(self):
         self.init_block_builder()
         # a list of relative positions; for each position p, the bytes
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -168,7 +168,7 @@
 
     def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
-        self.param_depth = 0
+        self.min_frame_depth = 0
         cpu = self.assembler.cpu
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
@@ -193,11 +193,9 @@
             self.min_bytes_before_label = 13
         return operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
-                       allgcrefs):
+    def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs):
         operations = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
-        self.param_depth = prev_depths[1]
         self.min_bytes_before_label = 0
         return operations
 
@@ -205,8 +203,15 @@
         self.min_bytes_before_label = max(self.min_bytes_before_label,
                                           at_least_position)
 
-    def reserve_param(self, n):
-        self.param_depth = max(self.param_depth, n)
+    def needed_extra_stack_locations(self, n):
+        # call *after* you needed extra stack locations: (%esp), (%esp+4)...
+        min_frame_depth = self.fm.get_frame_depth() + n
+        if min_frame_depth > self.min_frame_depth:
+            self.min_frame_depth = min_frame_depth
+
+    def get_final_frame_depth(self):
+        self.needed_extra_stack_locations(0)  # update min_frame_depth
+        return self.min_frame_depth
 
     def _set_initial_bindings(self, inputargs):
         if IS_X86_64:
@@ -376,25 +381,12 @@
     def locs_for_fail(self, guard_op):
         return [self.loc(v) for v in guard_op.getfailargs()]
 
-    def get_current_depth(self):
-        # return (self.fm.frame_depth, self.param_depth), but trying to share
-        # the resulting tuple among several calls
-        arg0 = self.fm.get_frame_depth()
-        arg1 = self.param_depth
-        result = self.assembler._current_depths_cache
-        if result[0] != arg0 or result[1] != arg1:
-            result = (arg0, arg1)
-            self.assembler._current_depths_cache = result
-        return result
-
     def perform_with_guard(self, op, guard_op, arglocs, result_loc):
         faillocs = self.locs_for_fail(guard_op)
         self.rm.position += 1
         self.xrm.position += 1
-        current_depths = self.get_current_depth()
         self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
-                                                   arglocs, result_loc,
-                                                   current_depths)
+                                                   arglocs, result_loc)
         if op.result is not None:
             self.possibly_free_var(op.result)
         self.possibly_free_vars(guard_op.getfailargs())
@@ -407,10 +399,8 @@
                                                       arglocs))
             else:
                 self.assembler.dump('%s(%s)' % (guard_op, arglocs))
-        current_depths = self.get_current_depth()
         self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
-                                              result_loc,
-                                              current_depths)
+                                              result_loc)
         self.possibly_free_vars(guard_op.getfailargs())
 
     def PerformDiscard(self, op, arglocs):
@@ -1393,7 +1383,7 @@
         self.force_spill_var(op.getarg(0))
 
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
-        shape = gcrootmap.get_basic_shape(IS_X86_64)
+        shape = gcrootmap.get_basic_shape()
         for v, val in self.fm.bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
diff --git a/pypy/jit/backend/x86/support.py b/pypy/jit/backend/x86/support.py
--- a/pypy/jit/backend/x86/support.py
+++ b/pypy/jit/backend/x86/support.py
@@ -1,6 +1,7 @@
 import sys
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.jit.backend.x86.arch import WORD
 
 
 def values_array(TP, size):
@@ -37,8 +38,13 @@
 
 if sys.platform == 'win32':
     ensure_sse2_floats = lambda : None
+    # XXX check for SSE2 on win32 too
 else:
+    if WORD == 4:
+        extra = ['-DPYPY_X86_CHECK_SSE2']
+    else:
+        extra = []
     ensure_sse2_floats = rffi.llexternal_use_eci(ExternalCompilationInfo(
         compile_extra = ['-msse2', '-mfpmath=sse',
-                         '-DPYPY_CPU_HAS_STANDARD_PRECISION'],
+                         '-DPYPY_CPU_HAS_STANDARD_PRECISION'] + extra,
         ))
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -28,7 +28,7 @@
 
 class MockGcRootMap(object):
     is_shadow_stack = False
-    def get_basic_shape(self, is_64_bit):
+    def get_basic_shape(self):
         return ['shape']
     def add_frame_offset(self, shape, offset):
         shape.append(offset)
@@ -184,6 +184,8 @@
         self.addrs[1] = self.addrs[0] + 64
         self.calls = []
         def malloc_slowpath(size):
+            if self.gcrootmap is not None:   # hook
+                self.gcrootmap.hook_malloc_slowpath()
             self.calls.append(size)
             # reset the nursery
             nadr = rffi.cast(lltype.Signed, self.nursery)
@@ -257,3 +259,218 @@
         assert gc_ll_descr.addrs[0] == nurs_adr + 24
         # this should call slow path once
         assert gc_ll_descr.calls == [24]
+
+    def test_save_regs_around_malloc(self):
+        S1 = lltype.GcStruct('S1')
+        S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
+                                   ('s1', lltype.Ptr(S1)),
+                                   ('s2', lltype.Ptr(S1)),
+                                   ('s3', lltype.Ptr(S1)),
+                                   ('s4', lltype.Ptr(S1)),
+                                   ('s5', lltype.Ptr(S1)),
+                                   ('s6', lltype.Ptr(S1)),
+                                   ('s7', lltype.Ptr(S1)),
+                                   ('s8', lltype.Ptr(S1)),
+                                   ('s9', lltype.Ptr(S1)),
+                                   ('s10', lltype.Ptr(S1)),
+                                   ('s11', lltype.Ptr(S1)),
+                                   ('s12', lltype.Ptr(S1)),
+                                   ('s13', lltype.Ptr(S1)),
+                                   ('s14', lltype.Ptr(S1)),
+                                   ('s15', lltype.Ptr(S1)))
+        cpu = self.cpu
+        self.namespace = self.namespace.copy()
+        for i in range(16):
+            self.namespace['ds%i' % i] = cpu.fielddescrof(S2, 's%d' % i)
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=ds0)
+        p2 = getfield_gc(p0, descr=ds1)
+        p3 = getfield_gc(p0, descr=ds2)
+        p4 = getfield_gc(p0, descr=ds3)
+        p5 = getfield_gc(p0, descr=ds4)
+        p6 = getfield_gc(p0, descr=ds5)
+        p7 = getfield_gc(p0, descr=ds6)
+        p8 = getfield_gc(p0, descr=ds7)
+        p9 = getfield_gc(p0, descr=ds8)
+        p10 = getfield_gc(p0, descr=ds9)
+        p11 = getfield_gc(p0, descr=ds10)
+        p12 = getfield_gc(p0, descr=ds11)
+        p13 = getfield_gc(p0, descr=ds12)
+        p14 = getfield_gc(p0, descr=ds13)
+        p15 = getfield_gc(p0, descr=ds14)
+        p16 = getfield_gc(p0, descr=ds15)
+        #
+        # now all registers are in use
+        p17 = call_malloc_nursery(40)
+        p18 = call_malloc_nursery(40)     # overflow
+        #
+        finish(p1, p2, p3, p4, p5, p6, p7, p8,         \
+               p9, p10, p11, p12, p13, p14, p15, p16)
+        '''
+        s2 = lltype.malloc(S2)
+        for i in range(16):
+            setattr(s2, 's%d' % i, lltype.malloc(S1))
+        s2ref = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
+        #
+        self.interpret(ops, [s2ref])
+        gc_ll_descr = cpu.gc_ll_descr
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.calls == [40]
+        # check the returned pointers
+        for i in range(16):
+            s1ref = self.cpu.get_latest_value_ref(i)
+            s1 = lltype.cast_opaque_ptr(lltype.Ptr(S1), s1ref)
+            assert s1 == getattr(s2, 's%d' % i)
+
+
+class MockShadowStackRootMap(MockGcRootMap):
+    is_shadow_stack = True
+    MARKER_FRAME = 88       # this marker follows the frame addr
+    S1 = lltype.GcStruct('S1')
+
+    def __init__(self):
+        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 20,
+                                   flavor='raw')
+        # root_stack_top
+        self.addrs[0] = rffi.cast(lltype.Signed, self.addrs) + 3*WORD
+        # random stuff
+        self.addrs[1] = 123456
+        self.addrs[2] = 654321
+        self.check_initial_and_final_state()
+        self.callshapes = {}
+        self.should_see = []
+
+    def check_initial_and_final_state(self):
+        assert self.addrs[0] == rffi.cast(lltype.Signed, self.addrs) + 3*WORD
+        assert self.addrs[1] == 123456
+        assert self.addrs[2] == 654321
+
+    def get_root_stack_top_addr(self):
+        return rffi.cast(lltype.Signed, self.addrs)
+
+    def compress_callshape(self, shape, datablockwrapper):
+        assert shape[0] == 'shape'
+        return ['compressed'] + shape[1:]
+
+    def write_callshape(self, mark, force_index):
+        assert mark[0] == 'compressed'
+        assert force_index not in self.callshapes
+        assert force_index == 42 + len(self.callshapes)
+        self.callshapes[force_index] = mark
+
+    def hook_malloc_slowpath(self):
+        num_entries = self.addrs[0] - rffi.cast(lltype.Signed, self.addrs)
+        assert num_entries == 5*WORD    # 3 initially, plus 2 by the asm frame
+        assert self.addrs[1] == 123456  # unchanged
+        assert self.addrs[2] == 654321  # unchanged
+        frame_addr = self.addrs[3]                   # pushed by the asm frame
+        assert self.addrs[4] == self.MARKER_FRAME    # pushed by the asm frame
+        #
+        from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS
+        addr = rffi.cast(rffi.CArrayPtr(lltype.Signed),
+                         frame_addr + FORCE_INDEX_OFS)
+        force_index = addr[0]
+        assert force_index == 43    # in this test: the 2nd call_malloc_nursery
+        #
+        # The callshapes[43] saved above should list addresses both in the
+        # COPY_AREA and in the "normal" stack, where all the 16 values p1-p16
+        # of test_save_regs_at_correct_place should have been stored.  Here
+        # we replace them with new addresses, to emulate a moving GC.
+        shape = self.callshapes[force_index]
+        assert len(shape[1:]) == len(self.should_see)
+        new_objects = [None] * len(self.should_see)
+        for ofs in shape[1:]:
+            assert isinstance(ofs, int)    # not a register at all here
+            addr = rffi.cast(rffi.CArrayPtr(lltype.Signed), frame_addr + ofs)
+            contains = addr[0]
+            for j in range(len(self.should_see)):
+                obj = self.should_see[j]
+                if contains == rffi.cast(lltype.Signed, obj):
+                    assert new_objects[j] is None   # duplicate?
+                    break
+            else:
+                assert 0   # the value read from the stack looks random?
+            new_objects[j] = lltype.malloc(self.S1)
+            addr[0] = rffi.cast(lltype.Signed, new_objects[j])
+        self.should_see[:] = new_objects
+
+
+class TestMallocShadowStack(BaseTestRegalloc):
+
+    def setup_method(self, method):
+        cpu = CPU(None, None)
+        cpu.gc_ll_descr = GCDescrFastpathMalloc()
+        cpu.gc_ll_descr.gcrootmap = MockShadowStackRootMap()
+        cpu.setup_once()
+        for i in range(42):
+            cpu.reserve_some_free_fail_descr_number()
+        self.cpu = cpu
+
+    def test_save_regs_at_correct_place(self):
+        cpu = self.cpu
+        gc_ll_descr = cpu.gc_ll_descr
+        S1 = gc_ll_descr.gcrootmap.S1
+        S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
+                                   ('s1', lltype.Ptr(S1)),
+                                   ('s2', lltype.Ptr(S1)),
+                                   ('s3', lltype.Ptr(S1)),
+                                   ('s4', lltype.Ptr(S1)),
+                                   ('s5', lltype.Ptr(S1)),
+                                   ('s6', lltype.Ptr(S1)),
+                                   ('s7', lltype.Ptr(S1)),
+                                   ('s8', lltype.Ptr(S1)),
+                                   ('s9', lltype.Ptr(S1)),
+                                   ('s10', lltype.Ptr(S1)),
+                                   ('s11', lltype.Ptr(S1)),
+                                   ('s12', lltype.Ptr(S1)),
+                                   ('s13', lltype.Ptr(S1)),
+                                   ('s14', lltype.Ptr(S1)),
+                                   ('s15', lltype.Ptr(S1)))
+        self.namespace = self.namespace.copy()
+        for i in range(16):
+            self.namespace['ds%i' % i] = cpu.fielddescrof(S2, 's%d' % i)
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=ds0)
+        p2 = getfield_gc(p0, descr=ds1)
+        p3 = getfield_gc(p0, descr=ds2)
+        p4 = getfield_gc(p0, descr=ds3)
+        p5 = getfield_gc(p0, descr=ds4)
+        p6 = getfield_gc(p0, descr=ds5)
+        p7 = getfield_gc(p0, descr=ds6)
+        p8 = getfield_gc(p0, descr=ds7)
+        p9 = getfield_gc(p0, descr=ds8)
+        p10 = getfield_gc(p0, descr=ds9)
+        p11 = getfield_gc(p0, descr=ds10)
+        p12 = getfield_gc(p0, descr=ds11)
+        p13 = getfield_gc(p0, descr=ds12)
+        p14 = getfield_gc(p0, descr=ds13)
+        p15 = getfield_gc(p0, descr=ds14)
+        p16 = getfield_gc(p0, descr=ds15)
+        #
+        # now all registers are in use
+        p17 = call_malloc_nursery(40)
+        p18 = call_malloc_nursery(40)     # overflow
+        #
+        finish(p1, p2, p3, p4, p5, p6, p7, p8,         \
+               p9, p10, p11, p12, p13, p14, p15, p16)
+        '''
+        s2 = lltype.malloc(S2)
+        for i in range(16):
+            s1 = lltype.malloc(S1)
+            setattr(s2, 's%d' % i, s1)
+            gc_ll_descr.gcrootmap.should_see.append(s1)
+        s2ref = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
+        #
+        self.interpret(ops, [s2ref])
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.calls == [40]
+        gc_ll_descr.gcrootmap.check_initial_and_final_state()
+        # check the returned pointers
+        for i in range(16):
+            s1ref = self.cpu.get_latest_value_ref(i)
+            s1 = lltype.cast_opaque_ptr(lltype.Ptr(S1), s1ref)
+            for j in range(16):
+                assert s1 != getattr(s2, 's%d' % j)
+            assert s1 == gc_ll_descr.gcrootmap.should_see[i]
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -34,7 +34,6 @@
         '''
         loop = self.interpret(ops, [0])
         previous = loop._jitcelltoken.compiled_loop_token.frame_depth
-        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         assert self.getint(0) == 20
         ops = '''
         [i1]
@@ -51,7 +50,6 @@
         bridge = self.attach_bridge(ops, loop, -2)
         descr = loop.operations[3].getdescr()
         new = descr._x86_bridge_frame_depth
-        assert descr._x86_bridge_param_depth == 0
         # the force_spill() forces the stack to grow
         assert new > previous
         fail = self.run(loop, 0)
@@ -116,10 +114,8 @@
         loop_frame_depth = loop._jitcelltoken.compiled_loop_token.frame_depth
         bridge = self.attach_bridge(ops, loop, 6)
         guard_op = loop.operations[6]
-        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         # the force_spill() forces the stack to grow
         assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
-        assert guard_op.getdescr()._x86_bridge_param_depth == 0
         self.run(loop, 0, 0, 0, 0, 0, 0)
         assert self.getint(0) == 1
         assert self.getint(1) == 20
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -606,23 +606,37 @@
         assert self.getints(9) == [0, 1, 1, 1, 1, 1, 1, 1, 1]
 
 class TestRegAllocCallAndStackDepth(BaseTestRegalloc):
-    def expected_param_depth(self, num_args):
+    def expected_frame_depth(self, num_call_args, num_pushed_input_args=0):
         # Assumes the arguments are all non-float
         if IS_X86_32:
-            return num_args
+            extra_esp = num_call_args
+            return extra_esp
         elif IS_X86_64:
-            return max(num_args - 6, 0)
+            # 'num_pushed_input_args' is for X86_64 only
+            extra_esp = max(num_call_args - 6, 0)
+            return num_pushed_input_args + extra_esp
 
     def test_one_call(self):
         ops = '''
-        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b]
         i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
-        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b)
         '''
-        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
-        assert self.getints(10) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 8])
+        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 8]
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(1)
+        assert clt.frame_depth == self.expected_frame_depth(1, 5)
+
+    def test_one_call_reverse(self):
+        ops = '''
+        [i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b, i0]
+        i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b)
+        '''
+        loop = self.interpret(ops, [7, 9, 9 ,9, 9, 9, 9, 9, 9, 8, 4])
+        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 8]
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.frame_depth == self.expected_frame_depth(1, 6)
 
     def test_two_calls(self):
         ops = '''
@@ -634,7 +648,7 @@
         loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
         assert self.getints(10) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9]
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == self.expected_frame_depth(2, 5)
 
     def test_call_many_arguments(self):
         # NB: The first and last arguments in the call are constants. This
@@ -648,25 +662,31 @@
         loop = self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
         assert self.getint(0) == 55
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(10)
+        assert clt.frame_depth == self.expected_frame_depth(10)
 
     def test_bridge_calls_1(self):
         ops = '''
         [i0, i1]
         i2 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
-        guard_value(i2, 0, descr=fdescr1) [i2, i1]
+        guard_value(i2, 0, descr=fdescr1) [i2, i0, i1]
         finish(i1)
         '''
         loop = self.interpret(ops, [4, 7])
         assert self.getint(0) == 5
+        clt = loop._jitcelltoken.compiled_loop_token
+        orgdepth = clt.frame_depth
+        assert orgdepth == self.expected_frame_depth(1, 2)
+
         ops = '''
-        [i2, i1]
+        [i2, i0, i1]
         i3 = call(ConstClass(f2ptr), i2, i1, descr=f2_calldescr)        
-        finish(i3, descr=fdescr2)        
+        finish(i3, i0, descr=fdescr2)
         '''
         bridge = self.attach_bridge(ops, loop, -2)
 
-        assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == max(orgdepth, self.expected_frame_depth(2, 2))
+        assert loop.operations[-2].getdescr()._x86_bridge_frame_depth == \
+            self.expected_frame_depth(2, 2)
 
         self.run(loop, 4, 7)
         assert self.getint(0) == 5*7
@@ -676,10 +696,14 @@
         [i0, i1]
         i2 = call(ConstClass(f2ptr), i0, i1, descr=f2_calldescr)
         guard_value(i2, 0, descr=fdescr1) [i2]
-        finish(i1)
+        finish(i2)
         '''
         loop = self.interpret(ops, [4, 7])
         assert self.getint(0) == 4*7
+        clt = loop._jitcelltoken.compiled_loop_token
+        orgdepth = clt.frame_depth
+        assert orgdepth == self.expected_frame_depth(2)
+
         ops = '''
         [i2]
         i3 = call(ConstClass(f1ptr), i2, descr=f1_calldescr)        
@@ -687,7 +711,9 @@
         '''
         bridge = self.attach_bridge(ops, loop, -2)
 
-        assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == max(orgdepth, self.expected_frame_depth(1))
+        assert loop.operations[-2].getdescr()._x86_bridge_frame_depth == \
+            self.expected_frame_depth(1)
 
         self.run(loop, 4, 7)
         assert self.getint(0) == 29
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -371,7 +371,7 @@
 
         operations = [
             ResOperation(rop.LABEL, [i0], None, descr=targettoken),
-            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0], None),
+            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0, 0], None),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
@@ -390,7 +390,7 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0], None),
+            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0, 0], None),
             ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
@@ -531,12 +531,12 @@
         loop = """
         [i0]
         label(i0, descr=preambletoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
         label(i1, descr=targettoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i11 = int_add(i1, 1)
         i12 = int_ge(i11, 10)
         guard_false(i12) []
@@ -569,7 +569,7 @@
         loop = """
         [i0]
         label(i0, descr=targettoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
diff --git a/pypy/jit/backend/x86/test/test_ztranslation.py b/pypy/jit/backend/x86/test/test_ztranslation.py
--- a/pypy/jit/backend/x86/test/test_ztranslation.py
+++ b/pypy/jit/backend/x86/test/test_ztranslation.py
@@ -52,6 +52,7 @@
             set_param(jitdriver, "trace_eagerness", 2)
             total = 0
             frame = Frame(i)
+            j = float(j)
             while frame.i > 3:
                 jitdriver.can_enter_jit(frame=frame, total=total, j=j)
                 jitdriver.jit_merge_point(frame=frame, total=total, j=j)
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -365,7 +365,7 @@
     def handle_builtin_call(self, op):
         oopspec_name, args = support.decode_builtin_call(op)
         # dispatch to various implementations depending on the oopspec_name
-        if oopspec_name.startswith('list.') or oopspec_name == 'newlist':
+        if oopspec_name.startswith('list.') or oopspec_name.startswith('newlist'):
             prepare = self._handle_list_call
         elif oopspec_name.startswith('stroruni.'):
             prepare = self._handle_stroruni_call
@@ -1494,6 +1494,14 @@
                                arraydescr, v_length],
                               op.result)
 
+    def do_resizable_newlist_hint(self, op, args, arraydescr, lengthdescr,
+                                  itemsdescr, structdescr):
+        v_hint = self._get_initial_newlist_length(op, args)
+        return SpaceOperation('newlist_hint',
+                              [structdescr, lengthdescr, itemsdescr,
+                               arraydescr, v_hint],
+                              op.result)
+
     def do_resizable_list_getitem(self, op, args, arraydescr, lengthdescr,
                                   itemsdescr, structdescr):
         v_index, extraop = self._prepare_list_getset(op, lengthdescr, args,
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -144,6 +144,10 @@
 _ll_1_newlist.need_result_type = True
 _ll_2_newlist.need_result_type = True
 
+def _ll_1_newlist_hint(LIST, hint):
+    return LIST.ll_newlist_hint(hint)
+_ll_1_newlist_hint.need_result_type = True
+
 def _ll_1_list_len(l):
     return l.ll_length()
 def _ll_2_list_getitem(l, index):
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -982,6 +982,15 @@
         cpu.bh_setfield_gc_r(result, itemsdescr, items)
         return result
 
+    @arguments("cpu", "d", "d", "d", "d", "i", returns="r")
+    def bhimpl_newlist_hint(cpu, structdescr, lengthdescr, itemsdescr,
+                            arraydescr, lengthhint):
+        result = cpu.bh_new(structdescr)
+        cpu.bh_setfield_gc_i(result, lengthdescr, 0)
+        items = cpu.bh_new_array(arraydescr, lengthhint)
+        cpu.bh_setfield_gc_r(result, itemsdescr, items)
+        return result
+
     @arguments("cpu", "r", "d", "d", "i", returns="i")
     def bhimpl_getlistitem_gc_i(cpu, lst, itemsdescr, arraydescr, index):
         items = cpu.bh_getfield_gc_r(lst, itemsdescr)
@@ -1379,7 +1388,8 @@
         elif opnum == rop.GUARD_NO_OVERFLOW:
             # Produced by int_xxx_ovf().  The pc is just after the opcode.
             # We get here because it did not used to overflow, but now it does.
-            return get_llexception(self.cpu, OverflowError())
+            if not dont_change_position:
+                return get_llexception(self.cpu, OverflowError())
         #
         elif opnum == rop.GUARD_OVERFLOW:
             # Produced by int_xxx_ovf().  The pc is just after the opcode.
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -289,8 +289,21 @@
             assert isinstance(token, TargetToken)
             assert token.original_jitcell_token is None
             token.original_jitcell_token = trace.original_jitcell_token
-            
-    
+
+
+def do_compile_loop(metainterp_sd, inputargs, operations, looptoken,
+                    log=True, name=''):
+    metainterp_sd.logger_ops.log_loop(inputargs, operations, -2,
+                                      'compiling', name=name)
+    return metainterp_sd.cpu.compile_loop(inputargs, operations, looptoken,
+                                          log=log, name=name)
+
+def do_compile_bridge(metainterp_sd, faildescr, inputargs, operations,
+                      original_loop_token, log=True):
+    metainterp_sd.logger_ops.log_bridge(inputargs, operations, -2)
+    return metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
+                                            original_loop_token, log=log)
+
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     vinfo = jitdriver_sd.virtualizable_info
     if vinfo is not None:
@@ -319,9 +332,9 @@
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        asminfo = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                  original_jitcell_token,
-                                                  name=loopname)
+        asminfo = do_compile_loop(metainterp_sd, loop.inputargs,
+                                  operations, original_jitcell_token,
+                                  name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -333,7 +346,6 @@
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
-    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     if asminfo is not None:
         ops_offset = asminfo.ops_offset
     else:
@@ -365,9 +377,9 @@
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        asminfo = metainterp_sd.cpu.compile_bridge(faildescr, inputargs,
-                                                   operations,
-                                                   original_loop_token)
+        asminfo = do_compile_bridge(metainterp_sd, faildescr, inputargs,
+                                    operations,
+                                    original_loop_token)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
diff --git a/pypy/jit/metainterp/graphpage.py b/pypy/jit/metainterp/graphpage.py
--- a/pypy/jit/metainterp/graphpage.py
+++ b/pypy/jit/metainterp/graphpage.py
@@ -169,9 +169,9 @@
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
                 if jd_sd._get_printable_location_ptr:
-                    s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+                    s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
                     s = s.replace(',', '.') # we use comma for argument splitting
-                    op_repr = "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+                    op_repr = "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
             lines.append(op_repr)
             if is_interesting_guard(op):
                 tgt = op.getdescr()._debug_suboperations[0]
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -18,6 +18,10 @@
             debug_start("jit-log-noopt-loop")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
+        elif number == -2:
+            debug_start("jit-log-compiling-loop")
+            logops = self._log_operations(inputargs, operations, ops_offset)
+            debug_stop("jit-log-compiling-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, '(%s)' % name , ":", type,
@@ -31,6 +35,10 @@
             debug_start("jit-log-noopt-bridge")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
+        elif number == -2:
+            debug_start("jit-log-compiling-bridge")
+            logops = self._log_operations(inputargs, operations, ops_offset)
+            debug_stop("jit-log-compiling-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
@@ -102,9 +110,9 @@
     def repr_of_resop(self, op, ops_offset=None):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
             jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
-            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
             s = s.replace(',', '.') # we use comma for argument splitting
-            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+            return "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
         if ops_offset is None:
             offset = -1
         else:
@@ -141,7 +149,7 @@
             if target_token.exported_state:
                 for op in target_token.exported_state.inputarg_setup_ops:
                     debug_print('    ' + self.repr_of_resop(op))
-        
+
     def _log_operations(self, inputargs, operations, ops_offset):
         if not have_debug_prints():
             return
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -9,7 +9,7 @@
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
 from pypy.jit.metainterp.optimizeopt.pure import OptPure
 from pypy.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
-from pypy.rlib.jit import PARAMETERS
+from pypy.rlib.jit import PARAMETERS, ENABLE_ALL_OPTS
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 
@@ -30,6 +30,9 @@
 ALL_OPTS_LIST = [name for name, _ in ALL_OPTS]
 ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 
+assert ENABLE_ALL_OPTS == ALL_OPTS_NAMES, (
+    'please fix rlib/jit.py to say ENABLE_ALL_OPTS = %r' % (ALL_OPTS_NAMES,))
+
 def build_opt_chain(metainterp_sd, enable_opts):
     config = metainterp_sd.config
     optimizations = []
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -567,7 +567,7 @@
         assert isinstance(descr, compile.ResumeGuardDescr)
         modifier = resume.ResumeDataVirtualAdder(descr, self.resumedata_memo)
         try:
-            newboxes = modifier.finish(self.values, self.pendingfields)
+            newboxes = modifier.finish(self, self.pendingfields)
             if len(newboxes) > self.metainterp_sd.options.failargs_limit:
                 raise resume.TagOverflow
         except resume.TagOverflow:
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -398,6 +398,40 @@
         with raises(InvalidLoop):
             self.optimize_loop(ops, ops)
 
+    def test_issue1045(self):
+        ops = """
+        [i55]
+        i73 = int_mod(i55, 2)
+        i75 = int_rshift(i73, 63)
+        i76 = int_and(2, i75)
+        i77 = int_add(i73, i76)
+        i81 = int_eq(i77, 1)
+        i0 = int_ge(i55, 1)
+        guard_true(i0) []
+        label(i55)
+        i3 = int_mod(i55, 2)
+        i5 = int_rshift(i3, 63)
+        i6 = int_and(2, i5)
+        i7 = int_add(i3, i6)
+        i8 = int_eq(i7, 1)
+        escape(i8)
+        jump(i55)
+        """
+        expected = """
+        [i55]
+        i73 = int_mod(i55, 2)
+        i75 = int_rshift(i73, 63)
+        i76 = int_and(2, i75)
+        i77 = int_add(i73, i76)
+        i81 = int_eq(i77, 1)
+        i0 = int_ge(i55, 1)
+        guard_true(i0) []
+        label(i55, i81)
+        escape(i81)
+        jump(i55, i81)
+        """
+        self.optimize_loop(ops, expected)
+        
 class OptRenameStrlen(Optimization):
     def propagate_forward(self, op):
         dispatch_opt(self, op)
@@ -423,7 +457,7 @@
         metainterp_sd = FakeMetaInterpStaticData(self.cpu)
         optimize_unroll(metainterp_sd, loop, [OptRenameStrlen(), OptPure()], True)
 
-    def test_optimizer_renaming_boxes(self):
+    def test_optimizer_renaming_boxes1(self):
         ops = """
         [p1]
         i1 = strlen(p1)
@@ -457,7 +491,6 @@
         jump(p1, i11)
         """
         self.optimize_loop(ops, expected)
-
         
 
 class TestLLtype(OptimizeoptTestMultiLabel, LLtypeMixin):
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -5031,6 +5031,42 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_str_copy_virtual(self):
+        ops = """
+        [i0]
+        p0 = newstr(8)
+        strsetitem(p0, 0, i0)
+        strsetitem(p0, 1, i0)
+        strsetitem(p0, 2, i0)
+        strsetitem(p0, 3, i0)
+        strsetitem(p0, 4, i0)
+        strsetitem(p0, 5, i0)
+        strsetitem(p0, 6, i0)
+        strsetitem(p0, 7, i0)
+        p1 = newstr(12)
+        copystrcontent(p0, p1, 0, 0, 8)
+        strsetitem(p1, 8, 3)
+        strsetitem(p1, 9, 0)
+        strsetitem(p1, 10, 0)
+        strsetitem(p1, 11, 0)
+        finish(p1)
+        """
+        expected = """
+        [i0]
+        p1 = newstr(12)
+        strsetitem(p1, 0, i0)
+        strsetitem(p1, 1, i0)
+        strsetitem(p1, 2, i0)
+        strsetitem(p1, 3, i0)
+        strsetitem(p1, 4, i0)
+        strsetitem(p1, 5, i0)
+        strsetitem(p1, 6, i0)
+        strsetitem(p1, 7, i0)
+        strsetitem(p1, 8, 3)
+        finish(p1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -7760,6 +7760,59 @@
         """
         self.optimize_loop(ops, expected)
         
+    def test_constant_failargs(self):
+        ops = """
+        [p1, i2, i3]
+        setfield_gc(p1, ConstPtr(myptr), descr=nextdescr)
+        p16 = getfield_gc(p1, descr=nextdescr)
+        guard_true(i2) [p16, i3]
+        jump(p1, i3, i2)
+        """
+        preamble = """
+        [p1, i2, i3]
+        setfield_gc(p1, ConstPtr(myptr), descr=nextdescr)
+        guard_true(i2) [i3]
+        jump(p1, i3)
+        """
+        expected = """
+        [p1, i3]
+        guard_true(i3) []
+        jump(p1, 1)
+        """
+        self.optimize_loop(ops, expected, preamble)
+
+    def test_issue1048(self):
+        ops = """
+        [p1, i2, i3]
+        p16 = getfield_gc(p1, descr=nextdescr)
+        guard_true(i2) [p16]
+        setfield_gc(p1, ConstPtr(myptr), descr=nextdescr)
+        jump(p1, i3, i2)
+        """
+        expected = """
+        [p1, i3]
+        guard_true(i3) []
+        jump(p1, 1)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_issue1048_ok(self):
+        ops = """
+        [p1, i2, i3]
+        p16 = getfield_gc(p1, descr=nextdescr)
+        call(p16, descr=nonwritedescr)
+        guard_true(i2) [p16]
+        setfield_gc(p1, ConstPtr(myptr), descr=nextdescr)
+        jump(p1, i3, i2)
+        """
+        expected = """
+        [p1, i3]
+        call(ConstPtr(myptr), descr=nonwritedescr)
+        guard_true(i3) []
+        jump(p1, 1)
+        """
+        self.optimize_loop(ops, expected)
+
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
 
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -9,7 +9,6 @@
 from pypy.jit.metainterp.inliner import Inliner
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.resume import Snapshot
-from pypy.rlib.debug import debug_print
 import sys, os
 
 # FIXME: Introduce some VirtualOptimizer super class instead
@@ -121,9 +120,9 @@
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if cell_token.retraced_count < limit:
                     cell_token.retraced_count += 1
-                    debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+                    #debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
                 else:
-                    debug_print("Retrace count reached, jumping to preamble")
+                    #debug_print("Retrace count reached, jumping to preamble")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
@@ -260,7 +259,7 @@
             if op and op.result:
                 preamble_value = exported_state.exported_values[op.result]
                 value = self.optimizer.getvalue(op.result)
-                if not value.is_virtual():
+                if not value.is_virtual() and not value.is_constant():
                     imp = ValueImporter(self, preamble_value, op)
                     self.optimizer.importable_values[value] = imp
                 newvalue = self.optimizer.getvalue(op.result)
@@ -268,12 +267,14 @@
                 # note that emitting here SAME_AS should not happen, but
                 # in case it does, we would prefer to be suboptimal in asm
                 # to a fatal RPython exception.
-                if newresult is not op.result and not newvalue.is_constant():
+                if newresult is not op.result and \
+                   not self.short_boxes.has_producer(newresult) and \
+                   not newvalue.is_constant():
                     op = ResOperation(rop.SAME_AS, [op.result], newresult)
                     self.optimizer._newoperations.append(op)
-                    if self.optimizer.loop.logops:
-                        debug_print('  Falling back to add extra: ' +
-                                    self.optimizer.loop.logops.repr_of_resop(op))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('  Falling back to add extra: ' +
+                    #                self.optimizer.loop.logops.repr_of_resop(op))
                     
         self.optimizer.flush()
         self.optimizer.emitting_dissabled = False
@@ -339,8 +340,8 @@
             if i == len(newoperations):
                 while j < len(jumpargs):
                     a = jumpargs[j]
-                    if self.optimizer.loop.logops:
-                        debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
                     self.import_box(a, inputargs, short_jumpargs, jumpargs)
                     j += 1
             else:
@@ -351,11 +352,11 @@
                 if op.is_guard():
                     args = args + op.getfailargs()
 
-                if self.optimizer.loop.logops:
-                    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
+                #if self.optimizer.loop.logops:
+                #    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
                 for a in args:
-                    if self.optimizer.loop.logops:
-                        debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
                     self.import_box(a, inputargs, short_jumpargs, jumpargs)
                 i += 1
             newoperations = self.optimizer.get_newoperations()
@@ -368,18 +369,18 @@
         # that is compatible with the virtual state at the start of the loop
         modifier = VirtualStateAdder(self.optimizer)
         final_virtual_state = modifier.get_virtual_state(original_jumpargs)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print('Closed loop with ')
+        #debug_start('jit-log-virtualstate')
+        #virtual_state.debug_print('Closed loop with ')
         bad = {}
         if not virtual_state.generalization_of(final_virtual_state, bad):
             # We ended up with a virtual state that is not compatible
             # and we are thus unable to jump to the start of the loop
-            final_virtual_state.debug_print("Bad virtual state at end of loop, ",
-                                            bad)
-            debug_stop('jit-log-virtualstate')
+            #final_virtual_state.debug_print("Bad virtual state at end of loop, ",
+            #                                bad)
+            #debug_stop('jit-log-virtualstate')
             raise InvalidLoop
             
-        debug_stop('jit-log-virtualstate')
+        #debug_stop('jit-log-virtualstate')
 
         maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
         if self.optimizer.emitted_guards > maxguards:
@@ -442,9 +443,9 @@
                 self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer,
                                              seen)
 
-        if self.optimizer.loop.logops:
-            debug_print('  Emitting short op: ' +
-                        self.optimizer.loop.logops.repr_of_resop(op))
+        #if self.optimizer.loop.logops:
+        #    debug_print('  Emitting short op: ' +
+        #                self.optimizer.loop.logops.repr_of_resop(op))
 
         optimizer.send_extra_operation(op)
         seen[op.result] = True
@@ -525,8 +526,8 @@
         args = jumpop.getarglist()
         modifier = VirtualStateAdder(self.optimizer)
         virtual_state = modifier.get_virtual_state(args)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print("Looking for ")
+        #debug_start('jit-log-virtualstate')
+        #virtual_state.debug_print("Looking for ")
 
         for target in cell_token.target_tokens:
             if not target.virtual_state:
@@ -535,10 +536,10 @@
             extra_guards = []
 
             bad = {}
-            debugmsg = 'Did not match '
+            #debugmsg = 'Did not match '
             if target.virtual_state.generalization_of(virtual_state, bad):
                 ok = True
-                debugmsg = 'Matched '
+                #debugmsg = 'Matched '
             else:
                 try:
                     cpu = self.optimizer.cpu
@@ -547,13 +548,13 @@
                                                          extra_guards)
 
                     ok = True
-                    debugmsg = 'Guarded to match '
+                    #debugmsg = 'Guarded to match '
                 except InvalidLoop:
                     pass
-            target.virtual_state.debug_print(debugmsg, bad)
+            #target.virtual_state.debug_print(debugmsg, bad)
 
             if ok:
-                debug_stop('jit-log-virtualstate')
+                #debug_stop('jit-log-virtualstate')
 
                 values = [self.getvalue(arg)
                           for arg in jumpop.getarglist()]
@@ -574,13 +575,13 @@
                         newop = inliner.inline_op(shop)
                         self.optimizer.send_extra_operation(newop)
                 except InvalidLoop:
-                    debug_print("Inlining failed unexpectedly",
-                                "jumping to preamble instead")
+                    #debug_print("Inlining failed unexpectedly",
+                    #            "jumping to preamble instead")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
                 return True
-        debug_stop('jit-log-virtualstate')
+        #debug_stop('jit-log-virtualstate')
         return False
 
 class ValueImporter(object):
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -681,13 +681,14 @@
             self.synthetic[op] = True
 
     def debug_print(self, logops):
-        debug_start('jit-short-boxes')
-        for box, op in self.short_boxes.items():
-            if op:
-                debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
-            else:
-                debug_print(logops.repr_of_arg(box) + ': None')
-        debug_stop('jit-short-boxes')
+        if 0:
+            debug_start('jit-short-boxes')
+            for box, op in self.short_boxes.items():
+                if op:
+                    debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
+                else:
+                    debug_print(logops.repr_of_arg(box) + ': None')
+            debug_stop('jit-short-boxes')
 
     def operations(self):
         if not we_are_translated(): # For tests
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -505,14 +505,23 @@
 
         if length.is_constant() and length.box.getint() == 0:
             return
-        copy_str_content(self,
-            src.force_box(self),
-            dst.force_box(self),
-            srcstart.force_box(self),
-            dststart.force_box(self),
-            length.force_box(self),
-            mode, need_next_offset=False
-        )
+        elif (src.is_virtual() and dst.is_virtual() and srcstart.is_constant() and
+            dststart.is_constant() and length.is_constant()):
+
+            src_start = srcstart.force_box(self).getint()
+            dst_start = dststart.force_box(self).getint()
+            for index in range(length.force_box(self).getint()):
+                vresult = self.strgetitem(src, optimizer.ConstantValue(ConstInt(index + src_start)), mode)
+                dst.setitem(index + dst_start, vresult)
+        else:
+            copy_str_content(self,
+                src.force_box(self),
+                dst.force_box(self),
+                srcstart.force_box(self),
+                dststart.force_box(self),
+                length.force_box(self),
+                mode, need_next_offset=False
+            )
 
     def optimize_CALL(self, op):
         # dispatch based on 'oopspecindex' to a method that handles
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -509,6 +509,15 @@
         self._opimpl_setfield_gc_any(sbox, itemsdescr, abox)
         return sbox
 
+    @arguments("descr", "descr", "descr", "descr", "box")
+    def opimpl_newlist_hint(self, structdescr, lengthdescr, itemsdescr,
+                            arraydescr, sizehintbox):
+        sbox = self.opimpl_new(structdescr)
+        self._opimpl_setfield_gc_any(sbox, lengthdescr, history.CONST_FALSE)
+        abox = self.opimpl_new_array(arraydescr, sizehintbox)
+        self._opimpl_setfield_gc_any(sbox, itemsdescr, abox)
+        return sbox
+
     @arguments("box", "descr", "descr", "box")
     def _opimpl_getlistitem_gc_any(self, listbox, itemsdescr, arraydescr,
                                    indexbox):
@@ -974,9 +983,11 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jitdriver_sd, jdindex, self.metainterp.portal_call_depth,
+        self.debug_merge_point(jitdriver_sd, jdindex,
+                               self.metainterp.portal_call_depth,
+                               self.metainterp.call_ids[-1],
                                greenboxes)
-        
+
         if self.metainterp.seen_loop_header_for_jdindex < 0:
             if not any_operation:
                 return
@@ -1028,11 +1039,11 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, jd_index, portal_call_depth, greenkey):
+    def debug_merge_point(self, jitdriver_sd, jd_index, portal_call_depth, current_call_id, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
         loc = jitdriver_sd.warmstate.get_location_str(greenkey)
         debug_print(loc)
-        args = [ConstInt(jd_index), ConstInt(portal_call_depth)] + greenkey
+        args = [ConstInt(jd_index), ConstInt(portal_call_depth), ConstInt(current_call_id)] + greenkey
         self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
@@ -1574,11 +1585,14 @@
         self.call_pure_results = args_dict_box()
         self.heapcache = HeapCache()
 
+        self.call_ids = []
+        self.current_call_id = 0
+
     def retrace_needed(self, trace):
         self.partial_trace = trace
         self.retracing_from = len(self.history.operations) - 1
         self.heapcache.reset()
-        
+
 
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
@@ -1592,6 +1606,8 @@
     def newframe(self, jitcode, greenkey=None):
         if jitcode.is_portal:
             self.portal_call_depth += 1
+            self.call_ids.append(self.current_call_id)
+            self.current_call_id += 1
         if greenkey is not None and self.is_main_jitcode(jitcode):
             self.portal_trace_positions.append(
                     (greenkey, len(self.history.operations)))
@@ -1608,6 +1624,7 @@
         jitcode = frame.jitcode
         if jitcode.is_portal:
             self.portal_call_depth -= 1
+            self.call_ids.pop()
         if frame.greenkey is not None and self.is_main_jitcode(jitcode):
             self.portal_trace_positions.append(
                     (None, len(self.history.operations)))
@@ -1976,7 +1993,7 @@
                 # Found!  Compile it as a loop.
                 # raises in case it works -- which is the common case
                 if self.partial_trace:
-                    if  start != self.retracing_from: 
+                    if  start != self.retracing_from:
                         raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
                 self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
@@ -2064,11 +2081,12 @@
             pass # XXX we want to do something special in resume descr,
                  # but not now
         elif opnum == rop.GUARD_NO_OVERFLOW:   # an overflow now detected
-            self.execute_raised(OverflowError(), constant=True)
-            try:
-                self.finishframe_exception()
-            except ChangeFrame:
-                pass
+            if not dont_change_position:
+                self.execute_raised(OverflowError(), constant=True)
+                try:
+                    self.finishframe_exception()
+                except ChangeFrame:
+                    pass
         elif opnum == rop.GUARD_OVERFLOW:      # no longer overflowing
             self.clear_exception()
         else:
@@ -2084,7 +2102,7 @@
             if not token.target_tokens:
                 return None
         return token
-        
+
     def compile_loop(self, original_boxes, live_arg_boxes, start, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = original_boxes[:num_green_args]
@@ -2349,7 +2367,7 @@
             # warmstate.py.
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            assert not vinfo.gettoken(virtualizable)
+            assert not vinfo.is_token_nonnull_gcref(virtualizable)
             # fill the virtualizable with the local boxes
             self.synchronize_virtualizable()
         #
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -182,23 +182,22 @@
 
     # env numbering
 
-    def number(self, values, snapshot):
+    def number(self, optimizer, snapshot):
         if snapshot is None:
             return lltype.nullptr(NUMBERING), {}, 0
         if snapshot in self.numberings:
              numb, liveboxes, v = self.numberings[snapshot]
              return numb, liveboxes.copy(), v
 
-        numb1, liveboxes, v = self.number(values, snapshot.prev)
+        numb1, liveboxes, v = self.number(optimizer, snapshot.prev)
         n = len(liveboxes)-v
         boxes = snapshot.boxes
         length = len(boxes)
         numb = lltype.malloc(NUMBERING, length)
         for i in range(length):
             box = boxes[i]
-            value = values.get(box, None)
-            if value is not None:
-                box = value.get_key_box()
+            value = optimizer.getvalue(box)
+            box = value.get_key_box()
 
             if isinstance(box, Const):
                 tagged = self.getconst(box)
@@ -318,14 +317,14 @@
         _, tagbits = untag(tagged)
         return tagbits == TAGVIRTUAL
 
-    def finish(self, values, pending_setfields=[]):
+    def finish(self, optimizer, pending_setfields=[]):
         # compute the numbering
         storage = self.storage
         # make sure that nobody attached resume data to this guard yet
         assert not storage.rd_numb
         snapshot = storage.rd_snapshot
         assert snapshot is not None # is that true?
-        numb, liveboxes_from_env, v = self.memo.number(values, snapshot)
+        numb, liveboxes_from_env, v = self.memo.number(optimizer, snapshot)
         self.liveboxes_from_env = liveboxes_from_env
         self.liveboxes = {}
         storage.rd_numb = numb
@@ -341,23 +340,23 @@
                 liveboxes[i] = box
             else:
                 assert tagbits == TAGVIRTUAL
-                value = values[box]
+                value = optimizer.getvalue(box)
                 value.get_args_for_fail(self)
 
         for _, box, fieldbox, _ in pending_setfields:
             self.register_box(box)
             self.register_box(fieldbox)
-            value = values[fieldbox]
+            value = optimizer.getvalue(fieldbox)
             value.get_args_for_fail(self)
 
-        self._number_virtuals(liveboxes, values, v)
+        self._number_virtuals(liveboxes, optimizer, v)
         self._add_pending_fields(pending_setfields)
 
         storage.rd_consts = self.memo.consts
         dump_storage(storage, liveboxes)
         return liveboxes[:]
 
-    def _number_virtuals(self, liveboxes, values, num_env_virtuals):
+    def _number_virtuals(self, liveboxes, optimizer, num_env_virtuals):
         # !! 'liveboxes' is a list that is extend()ed in-place !!
         memo = self.memo
         new_liveboxes = [None] * memo.num_cached_boxes()
@@ -397,7 +396,7 @@
             memo.nvholes += length - len(vfieldboxes)
             for virtualbox, fieldboxes in vfieldboxes.iteritems():
                 num, _ = untag(self.liveboxes[virtualbox])
-                value = values[virtualbox]
+                value = optimizer.getvalue(virtualbox)
                 fieldnums = [self._gettagged(box)
                              for box in fieldboxes]
                 vinfo = value.make_virtual_info(self, fieldnums)
@@ -1102,14 +1101,14 @@
         virtualizable = self.decode_ref(numb.nums[index])
         if self.resume_after_guard_not_forced == 1:
             # in the middle of handle_async_forcing()
-            assert vinfo.gettoken(virtualizable)
-            vinfo.settoken(virtualizable, vinfo.TOKEN_NONE)
+            assert vinfo.is_token_nonnull_gcref(virtualizable)
+            vinfo.reset_token_gcref(virtualizable)
         else:
             # just jumped away from assembler (case 4 in the comment in
             # virtualizable.py) into tracing (case 2); check that vable_token
             # is and stays 0.  Note the call to reset_vable_token() in
             # warmstate.py.
-            assert not vinfo.gettoken(virtualizable)
+            assert not vinfo.is_token_nonnull_gcref(virtualizable)
         return vinfo.write_from_resume_data_partial(virtualizable, self, numb)
 
     def load_value_of_type(self, TYPE, tagged):
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -144,7 +144,7 @@
                            'int_mul': 1, 'guard_true': 2, 'int_sub': 2})
 
 
-    def test_loop_invariant_mul_ovf(self):
+    def test_loop_invariant_mul_ovf1(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         def f(x, y):
             res = 0
@@ -235,6 +235,65 @@
                            'guard_true': 4, 'int_sub': 4, 'jump': 3,
                            'int_mul': 3, 'int_add': 4})
 
+    def test_loop_invariant_mul_ovf2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
+        def f(x, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x=x, y=y, res=res)
+                myjitdriver.jit_merge_point(x=x, y=y, res=res)
+                b = y * 2
+                try:
+                    res += ovfcheck(x * x) + b
+                except OverflowError:
+                    res += 1
+                y -= 1
+            return res
+        res = self.meta_interp(f, [sys.maxint, 7])
+        assert res == f(sys.maxint, 7)
+        self.check_trace_count(1)
+        res = self.meta_interp(f, [6, 7])
+        assert res == 308
+
+    def test_loop_invariant_mul_bridge_ovf1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x1', 'x2'])
+        def f(x1, x2, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x1=x1, x2=x2, y=y, res=res)
+                myjitdriver.jit_merge_point(x1=x1, x2=x2, y=y, res=res)
+                try:
+                    res += ovfcheck(x1 * x1)
+                except OverflowError:
+                    res += 1
+                if y<32 and (y>>2)&1==0:
+                    x1, x2 = x2, x1
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, sys.maxint, 48])
+        assert res == f(6, sys.maxint, 48)
+
+    def test_loop_invariant_mul_bridge_ovf2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x1', 'x2', 'n'])
+        def f(x1, x2, n, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x1=x1, x2=x2, y=y, res=res, n=n)
+                myjitdriver.jit_merge_point(x1=x1, x2=x2, y=y, res=res, n=n)
+                try:
+                    res += ovfcheck(x1 * x1)
+                except OverflowError:
+                    res += 1
+                y -= 1
+                if y&4 == 0:
+                    x1, x2 = x2, x1
+            return res
+        res = self.meta_interp(f, [6, sys.maxint, 32, 48])
+        assert res == f(6, sys.maxint, 32, 48)
+        res = self.meta_interp(f, [sys.maxint, 6, 32, 48])
+        assert res == f(sys.maxint, 6, 32, 48)
+        
+
     def test_loop_invariant_intbox(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         class I:
@@ -2943,11 +3002,18 @@
         self.check_resops(arraylen_gc=3)
 
     def test_ulonglong_mod(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'sa', 'i'])
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'a'])
+        class A:
+            pass
         def f(n):
             sa = i = rffi.cast(rffi.ULONGLONG, 1)
+            a = A()
             while i < rffi.cast(rffi.ULONGLONG, n):
-                myjitdriver.jit_merge_point(sa=sa, n=n, i=i)
+                a.sa = sa
+                a.i = i
+                myjitdriver.jit_merge_point(n=n, a=a)
+                sa = a.sa
+                i = a.i
                 sa += sa % i
                 i += 1
         res = self.meta_interp(f, [32])
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -14,7 +14,7 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token, name=''):
+    def compile_loop(self, inputargs, operations, token, log=True, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -1,4 +1,5 @@
 import py
+from pypy.rlib.objectmodel import newlist_hint
 from pypy.rlib.jit import JitDriver
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
@@ -228,6 +229,27 @@
         self.check_resops({'jump': 1, 'int_gt': 2, 'int_add': 2,
                            'guard_true': 2, 'int_sub': 2})
 
+    def test_newlist_hint(self):
+        def f(i):
+            l = newlist_hint(i)
+            return len(l)
+
+        r = self.interp_operations(f, [3])
+        assert r == 0
+
+    def test_newlist_hint_optimized(self):
+        driver = JitDriver(greens = [], reds = ['i'])
+
+        def f(i):
+            while i > 0:
+                driver.jit_merge_point(i=i)
+                l = newlist_hint(5)
+                l.append(1)
+                i -= l[0]
+
+        self.meta_interp(f, [10], listops=True)
+        self.check_resops(new_array=0, call=0)
+
 class TestOOtype(ListTests, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -54,7 +54,7 @@
         class FakeJitDriver(object):
             class warmstate(object):
                 get_location_str = staticmethod(lambda args: "dupa")
-        
+
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
@@ -77,7 +77,7 @@
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
         return logger, loop, oloop
-    
+
     def test_simple(self):
         inp = '''
         [i0, i1, i2, p3, p4, p5]
@@ -116,12 +116,13 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point(0, 0)
+        debug_merge_point(0, 0, 0)
         '''
         _, loop, oloop = self.reparse(inp, check_equal=False)
         assert loop.operations[0].getarg(1).getint() == 0
-        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
-        
+        assert loop.operations[0].getarg(2).getint() == 0
+        assert oloop.operations[0].getarg(2)._get_str() == "dupa"
+
     def test_floats(self):
         inp = '''
         [f0]
@@ -142,7 +143,7 @@
         output = logger.log_loop(loop)
         assert output.splitlines()[-1] == "jump(i0, descr=<Loop3>)"
         pure_parse(output)
-        
+
     def test_guard_descr(self):
         namespace = {'fdescr': BasicFailDescr()}
         inp = '''
@@ -154,7 +155,7 @@
         output = logger.log_loop(loop)
         assert output.splitlines()[-1] == "guard_true(i0, descr=<Guard0>) [i0]"
         pure_parse(output)
-        
+
         logger = Logger(self.make_metainterp_sd(), guard_number=False)
         output = logger.log_loop(loop)
         lastline = output.splitlines()[-1]
diff --git a/pypy/jit/metainterp/test/test_quasiimmut.py b/pypy/jit/metainterp/test/test_quasiimmut.py
--- a/pypy/jit/metainterp/test/test_quasiimmut.py
+++ b/pypy/jit/metainterp/test/test_quasiimmut.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.quasiimmut import get_current_qmut_instance
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.codewriter.policy import StopAtXPolicy
-from pypy.rlib.jit import JitDriver, dont_look_inside
+from pypy.rlib.jit import JitDriver, dont_look_inside, unroll_safe
 
 
 def test_get_current_qmut_instance():
@@ -480,6 +480,32 @@
         assert res == 1
         self.check_jitcell_token_count(2)
 
+    def test_for_loop_array(self):
+        myjitdriver = JitDriver(greens=[], reds=["n", "i"])
+        class Foo(object):
+            _immutable_fields_ = ["x?[*]"]
+            def __init__(self, x):
+                self.x = x
+        f = Foo([1, 3, 5, 6])
+        @unroll_safe
+        def g(v):
+            for x in f.x:
+                if x & 1 == 0:
+                    v += 1
+            return v
+        def main(n):
+            i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i)
+                i = g(i)
+            return i
+        res = self.meta_interp(main, [10])
+        assert res == 10
+        self.check_resops({
+            "int_add": 2, "int_lt": 2, "jump": 1, "guard_true": 2,
+            "guard_not_invalidated": 2
+        })
+
 
 class TestLLtypeGreenFieldsTests(QuasiImmutTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_resume.py b/pypy/jit/metainterp/test/test_resume.py
--- a/pypy/jit/metainterp/test/test_resume.py
+++ b/pypy/jit/metainterp/test/test_resume.py
@@ -18,6 +18,19 @@
     rd_virtuals = None
     rd_pendingfields = None
 
+
+class FakeOptimizer(object):
+    def __init__(self, values):
+        self.values = values
+        
+    def getvalue(self, box):
+        try:
+            value = self.values[box]
+        except KeyError:
+            value = self.values[box] = OptValue(box)
+        return value
+        
+
 def test_tag():
     assert tag(3, 1) == rffi.r_short(3<<2|1)
     assert tag(-3, 2) == rffi.r_short(-3<<2|2)
@@ -500,7 +513,7 @@
     capture_resumedata(fs, None, [], storage)
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish({})
+    liveboxes = modifier.finish(FakeOptimizer({}))
     metainterp = MyMetaInterp()
 
     b1t, b2t, b3t = [BoxInt(), BoxPtr(), BoxInt()]
@@ -524,7 +537,7 @@
     capture_resumedata(fs, [b4], [], storage)
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish({})
+    liveboxes = modifier.finish(FakeOptimizer({}))
     metainterp = MyMetaInterp()
 
     b1t, b2t, b3t, b4t = [BoxInt(), BoxPtr(), BoxInt(), BoxPtr()]
@@ -553,10 +566,10 @@
     
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish({})
+    liveboxes = modifier.finish(FakeOptimizer({}))
 
     modifier = ResumeDataVirtualAdder(storage2, memo)
-    liveboxes2 = modifier.finish({})
+    liveboxes2 = modifier.finish(FakeOptimizer({}))
 
     metainterp = MyMetaInterp()
 
@@ -617,7 +630,7 @@
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
     values = {b2: virtual_value(b2, b5, c4)}
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish(values)
+    liveboxes = modifier.finish(FakeOptimizer(values))
     assert len(storage.rd_virtuals) == 1
     assert storage.rd_virtuals[0].fieldnums == [tag(-1, TAGBOX),
                                                 tag(0, TAGCONST)]
@@ -628,7 +641,7 @@
     values = {b2: virtual_value(b2, b4, v6), b6: v6}
     memo.clear_box_virtual_numbers()
     modifier = ResumeDataVirtualAdder(storage2, memo)
-    liveboxes2 = modifier.finish(values)
+    liveboxes2 = modifier.finish(FakeOptimizer(values))
     assert len(storage2.rd_virtuals) == 2    
     assert storage2.rd_virtuals[0].fieldnums == [tag(len(liveboxes2)-1, TAGBOX),
                                                  tag(-1, TAGVIRTUAL)]
@@ -674,7 +687,7 @@
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
     values = {b2: virtual_value(b2, b5, c4)}
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish(values)
+    liveboxes = modifier.finish(FakeOptimizer(values))
     assert len(storage.rd_virtuals) == 1
     assert storage.rd_virtuals[0].fieldnums == [tag(-1, TAGBOX),
                                                 tag(0, TAGCONST)]
@@ -684,7 +697,7 @@
     capture_resumedata(fs, None, [], storage2)
     values[b4] = virtual_value(b4, b6, c4)
     modifier = ResumeDataVirtualAdder(storage2, memo)
-    liveboxes = modifier.finish(values)
+    liveboxes = modifier.finish(FakeOptimizer(values))
     assert len(storage2.rd_virtuals) == 2
     assert storage2.rd_virtuals[1].fieldnums == storage.rd_virtuals[0].fieldnums
     assert storage2.rd_virtuals[1] is storage.rd_virtuals[0]
@@ -703,7 +716,7 @@
     v1.setfield(LLtypeMixin.nextdescr, v2)
     values = {b1: v1, b2: v2}
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish(values)
+    liveboxes = modifier.finish(FakeOptimizer(values))
     assert liveboxes == [b3]
     assert len(storage.rd_virtuals) == 2
     assert storage.rd_virtuals[0].fieldnums == [tag(-1, TAGBOX),
@@ -776,7 +789,7 @@
 
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
 
-    numb, liveboxes, v = memo.number({}, snap1)
+    numb, liveboxes, v = memo.number(FakeOptimizer({}), snap1)
     assert v == 0
 
     assert liveboxes == {b1: tag(0, TAGBOX), b2: tag(1, TAGBOX),
@@ -788,7 +801,7 @@
                                     tag(0, TAGBOX), tag(2, TAGINT)]
     assert not numb.prev.prev
 
-    numb2, liveboxes2, v = memo.number({}, snap2)
+    numb2, liveboxes2, v = memo.number(FakeOptimizer({}), snap2)
     assert v == 0
     
     assert liveboxes2 == {b1: tag(0, TAGBOX), b2: tag(1, TAGBOX),
@@ -813,7 +826,8 @@
             return self.virt
 
     # renamed
-    numb3, liveboxes3, v = memo.number({b3: FakeValue(False, c4)}, snap3)
+    numb3, liveboxes3, v = memo.number(FakeOptimizer({b3: FakeValue(False, c4)}),
+                                       snap3)
     assert v == 0
     
     assert liveboxes3 == {b1: tag(0, TAGBOX), b2: tag(1, TAGBOX)}
@@ -825,7 +839,8 @@
     env4 = [c3, b4, b1, c3]
     snap4 = Snapshot(snap, env4)    
 
-    numb4, liveboxes4, v = memo.number({b4: FakeValue(True, b4)}, snap4)
+    numb4, liveboxes4, v = memo.number(FakeOptimizer({b4: FakeValue(True, b4)}),
+                                       snap4)
     assert v == 1
     
     assert liveboxes4 == {b1: tag(0, TAGBOX), b2: tag(1, TAGBOX),
@@ -837,8 +852,9 @@
     env5 = [b1, b4, b5]
     snap5 = Snapshot(snap4, env5)    
 
-    numb5, liveboxes5, v = memo.number({b4: FakeValue(True, b4),
-                                        b5: FakeValue(True, b5)}, snap5)
+    numb5, liveboxes5, v = memo.number(FakeOptimizer({b4: FakeValue(True, b4),
+                                                      b5: FakeValue(True, b5)}),
+                                       snap5)
     assert v == 2
     
     assert liveboxes5 == {b1: tag(0, TAGBOX), b2: tag(1, TAGBOX),
@@ -940,7 +956,7 @@
     storage = make_storage(b1s, b2s, b3s)
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())    
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish({})
+    liveboxes = modifier.finish(FakeOptimizer({}))
     assert storage.rd_snapshot is None
     cpu = MyCPU([])
     reader = ResumeDataDirectReader(MyMetaInterp(cpu), storage)
@@ -954,14 +970,14 @@
     storage = make_storage(b1s, b2s, b3s)
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())
     modifier = ResumeDataVirtualAdder(storage, memo)
-    modifier.finish({})
+    modifier.finish(FakeOptimizer({}))
     assert len(memo.consts) == 2
     assert storage.rd_consts is memo.consts
 
     b1s, b2s, b3s = [ConstInt(sys.maxint), ConstInt(2**17), ConstInt(-65)]
     storage2 = make_storage(b1s, b2s, b3s)
     modifier2 = ResumeDataVirtualAdder(storage2, memo)
-    modifier2.finish({})
+    modifier2.finish(FakeOptimizer({}))
     assert len(memo.consts) == 3    
     assert storage2.rd_consts is memo.consts
 
@@ -1022,7 +1038,7 @@
 
     val = FakeValue()
     values = {b1s: val, b2s: val}  
-    liveboxes = modifier.finish(values)
+    liveboxes = modifier.finish(FakeOptimizer(values))
     assert storage.rd_snapshot is None
     b1t, b3t = [BoxInt(11), BoxInt(33)]
     newboxes = _resume_remap(liveboxes, [b1_2, b3s], b1t, b3t)
@@ -1043,7 +1059,7 @@
     storage = make_storage(b1s, b2s, b3s)
     memo = ResumeDataLoopMemo(FakeMetaInterpStaticData())        
     modifier = ResumeDataVirtualAdder(storage, memo)
-    liveboxes = modifier.finish({})
+    liveboxes = modifier.finish(FakeOptimizer({}))
     b2t, b3t = [BoxPtr(demo55o), BoxInt(33)]
     newboxes = _resume_remap(liveboxes, [b2s, b3s], b2t, b3t)
     metainterp = MyMetaInterp()
@@ -1086,7 +1102,7 @@
     values = {b2s: v2, b4s: v4}
 
     liveboxes = []
-    modifier._number_virtuals(liveboxes, values, 0)
+    modifier._number_virtuals(liveboxes, FakeOptimizer(values), 0)
     storage.rd_consts = memo.consts[:]
     storage.rd_numb = None
     # resume
@@ -1156,7 +1172,7 @@
     modifier.register_virtual_fields(b2s, [b4s, c1s])
     liveboxes = []
     values = {b2s: v2}
-    modifier._number_virtuals(liveboxes, values, 0)
+    modifier._number_virtuals(liveboxes, FakeOptimizer(values), 0)
     dump_storage(storage, liveboxes)
     storage.rd_consts = memo.consts[:]
     storage.rd_numb = None
@@ -1203,7 +1219,7 @@
     v2.setfield(LLtypeMixin.bdescr, OptValue(b4s))
     modifier.register_virtual_fields(b2s, [c1s, b4s])
     liveboxes = []
-    modifier._number_virtuals(liveboxes, {b2s: v2}, 0)
+    modifier._number_virtuals(liveboxes, FakeOptimizer({b2s: v2}), 0)
     dump_storage(storage, liveboxes)
     storage.rd_consts = memo.consts[:]
     storage.rd_numb = None
@@ -1249,7 +1265,7 @@
 
     values = {b4s: v4, b2s: v2}
     liveboxes = []
-    modifier._number_virtuals(liveboxes, values, 0)
+    modifier._number_virtuals(liveboxes, FakeOptimizer(values), 0)
     assert liveboxes == [b2s, b4s] or liveboxes == [b4s, b2s]
     modifier._add_pending_fields([(LLtypeMixin.nextdescr, b2s, b4s, -1)])
     storage.rd_consts = memo.consts[:]
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -13,7 +13,7 @@
 
 
 class WarmspotTests(object):
-    
+
     def test_basic(self):
         mydriver = JitDriver(reds=['a'],
                              greens=['i'])
@@ -77,16 +77,16 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == (0, 123)
+            assert loc == (0, 0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
-        
+
         myjitdriver = JitDriver(greens = [], reds = ['n'])
         class A(object):
             def m(self, n):
                 return n-1
-            
+
         def g(n):
             while n > 0:
                 myjitdriver.can_enter_jit(n=n)
@@ -332,7 +332,7 @@
             ts = llhelper
             translate_support_code = False
             stats = "stats"
-            
+
             def get_fail_descr_number(self, d):
                 return -1
 
@@ -352,7 +352,7 @@
                 return "not callable"
 
         driver = JitDriver(reds = ['red'], greens = ['green'])
-        
+
         def f(green):
             red = 0
             while red < 10:
diff --git a/pypy/jit/metainterp/virtualizable.py b/pypy/jit/metainterp/virtualizable.py
--- a/pypy/jit/metainterp/virtualizable.py
+++ b/pypy/jit/metainterp/virtualizable.py
@@ -262,15 +262,15 @@
         force_now._dont_inline_ = True
         self.force_now = force_now
 
-        def gettoken(virtualizable):
+        def is_token_nonnull_gcref(virtualizable):
             virtualizable = cast_gcref_to_vtype(virtualizable)
-            return virtualizable.vable_token
-        self.gettoken = gettoken
+            return bool(virtualizable.vable_token)
+        self.is_token_nonnull_gcref = is_token_nonnull_gcref
 
-        def settoken(virtualizable, token):
+        def reset_token_gcref(virtualizable):
             virtualizable = cast_gcref_to_vtype(virtualizable)
-            virtualizable.vable_token = token
-        self.settoken = settoken
+            virtualizable.vable_token = VirtualizableInfo.TOKEN_NONE
+        self.reset_token_gcref = reset_token_gcref
 
     def _freeze_(self):
         return True
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -100,7 +100,7 @@
     if not kwds.get('translate_support_code', False):
         warmrunnerdesc.metainterp_sd.profiler.finish()
         warmrunnerdesc.metainterp_sd.cpu.finish_once()
-    print '~~~ return value:', res
+    print '~~~ return value:', repr(res)
     while repeat > 1:
         print '~' * 79
         res1 = interp.eval_graph(graph, args)
@@ -453,7 +453,7 @@
                     if sys.stdout == sys.__stdout__:
                         import pdb; pdb.post_mortem(tb)
                     raise e.__class__, e, tb
-                fatalerror('~~~ Crash in JIT! %s' % (e,), traceback=True)
+                fatalerror('~~~ Crash in JIT! %s' % (e,))
         crash_in_jit._dont_inline_ = True
 
         if self.translator.rtyper.type_system.name == 'lltypesystem':
diff --git a/pypy/jit/tl/tinyframe/tinyframe.py b/pypy/jit/tl/tinyframe/tinyframe.py
--- a/pypy/jit/tl/tinyframe/tinyframe.py
+++ b/pypy/jit/tl/tinyframe/tinyframe.py
@@ -210,7 +210,7 @@
     def repr(self):
         return "<function %s(%s)>" % (self.outer.repr(), self.inner.repr())
 
-driver = JitDriver(greens = ['code', 'i'], reds = ['self'],
+driver = JitDriver(greens = ['i', 'code'], reds = ['self'],
                    virtualizables = ['self'])
 
 class Frame(object):
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -146,16 +146,18 @@
     def test_debug_merge_point(self):
         x = '''
         []
-        debug_merge_point(0, "info")
-        debug_merge_point(0, 'info')
-        debug_merge_point(1, '<some ('other.')> info')
-        debug_merge_point(0, '(stuff) #1')
+        debug_merge_point(0, 0, "info")
+        debug_merge_point(0, 0, 'info')
+        debug_merge_point(1, 1, '<some ('other.')> info')
+        debug_merge_point(0, 0, '(stuff) #1')
         '''
         loop = self.parse(x)
-        assert loop.operations[0].getarg(1)._get_str() == 'info'
-        assert loop.operations[1].getarg(1)._get_str() == 'info'
-        assert loop.operations[2].getarg(1)._get_str() == "<some ('other.')> info"
-        assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
+        assert loop.operations[0].getarg(2)._get_str() == 'info'
+        assert loop.operations[0].getarg(1).value == 0
+        assert loop.operations[1].getarg(2)._get_str() == 'info'
+        assert loop.operations[2].getarg(2)._get_str() == "<some ('other.')> info"
+        assert loop.operations[2].getarg(1).value == 1
+        assert loop.operations[3].getarg(2)._get_str() == "(stuff) #1"
 
 
     def test_descr_with_obj_print(self):
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -1,5 +1,5 @@
+import sys
 
-# Package initialisation
 from pypy.interpreter.mixedmodule import MixedModule
 from pypy.module.imp.importing import get_pyc_magic
 
@@ -12,6 +12,19 @@
         "UnicodeBuilder": "interp_builders.W_UnicodeBuilder",
     }
 
+class TimeModule(MixedModule):
+    appleveldefs = {}
+    interpleveldefs = {}
+    if sys.platform.startswith("linux"):
+        interpleveldefs["clock_gettime"] = "interp_time.clock_gettime"
+        interpleveldefs["clock_getres"] = "interp_time.clock_getres"
+        for name in [
+            "CLOCK_REALTIME", "CLOCK_MONOTONIC", "CLOCK_MONOTONIC_RAW",
+            "CLOCK_PROCESS_CPUTIME_ID", "CLOCK_THREAD_CPUTIME_ID"
+        ]:
+            interpleveldefs[name] = "space.wrap(interp_time.%s)" % name
+
+
 class Module(MixedModule):
     appleveldefs = {
     }
@@ -32,6 +45,7 @@
 
     submodules = {
         "builders": BuildersModule,
+        "time": TimeModule,
     }
 
     def setup_after_space_initialization(self):
diff --git a/pypy/module/__pypy__/interp_time.py b/pypy/module/__pypy__/interp_time.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/interp_time.py
@@ -0,0 +1,64 @@
+import sys
+
+from pypy.interpreter.error import exception_from_errno
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rpython.tool import rffi_platform
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+
+class CConfig:
+    _compilation_info_ = ExternalCompilationInfo(
+        includes=["time.h"],
+        libraries=["rt"],
+    )
+
+    HAS_CLOCK_GETTIME = rffi_platform.Has('clock_gettime')
+
+    CLOCK_REALTIME = rffi_platform.DefinedConstantInteger("CLOCK_REALTIME")
+    CLOCK_MONOTONIC = rffi_platform.DefinedConstantInteger("CLOCK_MONOTONIC")
+    CLOCK_MONOTONIC_RAW = rffi_platform.DefinedConstantInteger("CLOCK_MONOTONIC_RAW")
+    CLOCK_PROCESS_CPUTIME_ID = rffi_platform.DefinedConstantInteger("CLOCK_PROCESS_CPUTIME_ID")
+    CLOCK_THREAD_CPUTIME_ID = rffi_platform.DefinedConstantInteger("CLOCK_THREAD_CPUTIME_ID")
+
+    TIMESPEC = rffi_platform.Struct("struct timespec", [
+        ("tv_sec", rffi.TIME_T),
+        ("tv_nsec", rffi.LONG),
+    ])
+
+cconfig = rffi_platform.configure(CConfig)
+
+HAS_CLOCK_GETTIME = cconfig["HAS_CLOCK_GETTIME"]
+
+CLOCK_REALTIME = cconfig["CLOCK_REALTIME"]
+CLOCK_MONOTONIC = cconfig["CLOCK_MONOTONIC"]
+CLOCK_MONOTONIC_RAW = cconfig["CLOCK_MONOTONIC_RAW"]
+CLOCK_PROCESS_CPUTIME_ID = cconfig["CLOCK_PROCESS_CPUTIME_ID"]
+CLOCK_THREAD_CPUTIME_ID = cconfig["CLOCK_THREAD_CPUTIME_ID"]
+
+TIMESPEC = cconfig["TIMESPEC"]
+
+c_clock_gettime = rffi.llexternal("clock_gettime",
+    [lltype.Signed, lltype.Ptr(TIMESPEC)], rffi.INT,
+    compilation_info=CConfig._compilation_info_, threadsafe=False
+)
+c_clock_getres = rffi.llexternal("clock_getres",
+    [lltype.Signed, lltype.Ptr(TIMESPEC)], rffi.INT,
+    compilation_info=CConfig._compilation_info_, threadsafe=False
+)
+
+ at unwrap_spec(clk_id="c_int")
+def clock_gettime(space, clk_id):
+    with lltype.scoped_alloc(TIMESPEC) as tp:
+        ret = c_clock_gettime(clk_id, tp)
+        if ret != 0:
+            raise exception_from_errno(space, space.w_IOError)
+        return space.wrap(tp.c_tv_sec + tp.c_tv_nsec * 1e-9)
+
+ at unwrap_spec(clk_id="c_int")
+def clock_getres(space, clk_id):
+    with lltype.scoped_alloc(TIMESPEC) as tp:
+        ret = c_clock_getres(clk_id, tp)
+        if ret != 0:
+            raise exception_from_errno(space, space.w_IOError)
+        return space.wrap(tp.c_tv_sec + tp.c_tv_nsec * 1e-9)
diff --git a/pypy/module/__pypy__/test/test_time.py b/pypy/module/__pypy__/test/test_time.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/test/test_time.py
@@ -0,0 +1,26 @@
+import py
+
+from pypy.module.__pypy__.interp_time import HAS_CLOCK_GETTIME
+
+
+class AppTestTime(object):
+    def setup_class(cls):
+        if not HAS_CLOCK_GETTIME:
+            py.test.skip("need time.clock_gettime")
+
+    def test_clock_realtime(self):
+        from __pypy__ import time
+        res = time.clock_gettime(time.CLOCK_REALTIME)
+        assert isinstance(res, float)
+
+    def test_clock_monotonic(self):
+        from __pypy__ import time
+        a = time.clock_gettime(time.CLOCK_MONOTONIC)
+        b = time.clock_gettime(time.CLOCK_MONOTONIC)
+        assert a <= b
+
+    def test_clock_getres(self):
+        from __pypy__ import time
+        res = time.clock_getres(time.CLOCK_REALTIME)
+        assert res > 0.0
+        assert res <= 1.0
diff --git a/pypy/module/_demo/test/test_sieve.py b/pypy/module/_demo/test/test_sieve.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_demo/test/test_sieve.py
@@ -0,0 +1,12 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestSieve:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=('_demo',))
+
+    def test_sieve(self):
+        import _demo
+        lst = _demo.sieve(100)
+        assert lst == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
+                       43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -100,7 +100,10 @@
         from _ffi import CDLL, types
         libm = CDLL(self.libm_name)
         pow_addr = libm.getaddressindll('pow')
-        assert pow_addr == self.pow_addr & (sys.maxint*2-1)
+        fff = sys.maxint*2-1
+        if sys.platform == 'win32':
+            fff = sys.maxint*2+1
+        assert pow_addr == self.pow_addr & fff
 
     def test_func_fromaddr(self):
         import sys
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -5,14 +5,13 @@
 from pypy.rlib import streamio
 from pypy.rlib.rarithmetic import r_longlong
 from pypy.rlib.rstring import StringBuilder
-from pypy.module._file.interp_stream import (W_AbstractStream, StreamErrors,
-    wrap_streamerror, wrap_oserror_as_ioerror)
+from pypy.module._file.interp_stream import W_AbstractStream, StreamErrors
 from pypy.module.posix.interp_posix import dispatch_filename
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
     interp_attrproperty, make_weakref_descr, interp_attrproperty_w)
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-
+from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
 
 class W_File(W_AbstractStream):
     """An interp-level file object.  This implements the same interface than
diff --git a/pypy/module/_file/interp_stream.py b/pypy/module/_file/interp_stream.py
--- a/pypy/module/_file/interp_stream.py
+++ b/pypy/module/_file/interp_stream.py
@@ -2,27 +2,13 @@
 from pypy.rlib import streamio
 from pypy.rlib.streamio import StreamErrors
 
-from pypy.interpreter.error import OperationError, wrap_oserror2
+from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
 
 
-def wrap_streamerror(space, e, w_filename=None):
-    if isinstance(e, streamio.StreamError):
-        return OperationError(space.w_ValueError,
-                              space.wrap(e.message))
-    elif isinstance(e, OSError):
-        return wrap_oserror_as_ioerror(space, e, w_filename)
-    else:
-        # should not happen: wrap_streamerror() is only called when
-        # StreamErrors = (OSError, StreamError) are raised
-        return OperationError(space.w_IOError, space.w_None)
-
-def wrap_oserror_as_ioerror(space, e, w_filename=None):
-    return wrap_oserror2(space, e, w_filename,
-                         w_exception_class=space.w_IOError)
-
 class W_AbstractStream(Wrappable):
     """Base class for interp-level objects that expose streams to app-level"""
     slock = None
diff --git a/pypy/module/_io/__init__.py b/pypy/module/_io/__init__.py
--- a/pypy/module/_io/__init__.py
+++ b/pypy/module/_io/__init__.py
@@ -28,6 +28,7 @@
         }
 
     def init(self, space):
+        MixedModule.init(self, space)
         w_UnsupportedOperation = space.call_function(
             space.w_type,
             space.wrap('UnsupportedOperation'),
@@ -35,3 +36,9 @@
             space.newdict())
         space.setattr(self, space.wrap('UnsupportedOperation'),
                       w_UnsupportedOperation)
+
+    def shutdown(self, space):
+        # at shutdown, flush all open streams.  Ignore I/O errors.
+        from pypy.module._io.interp_iobase import get_autoflushher
+        get_autoflushher(space).flush_all(space)
+
diff --git a/pypy/module/_io/interp_iobase.py b/pypy/module/_io/interp_iobase.py
--- a/pypy/module/_io/interp_iobase.py
+++ b/pypy/module/_io/interp_iobase.py
@@ -5,6 +5,8 @@
 from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.rlib.rstring import StringBuilder
+from pypy.rlib import rweakref
+
 
 DEFAULT_BUFFER_SIZE = 8192
 
@@ -43,6 +45,8 @@
         self.space = space
         self.w_dict = space.newdict()
         self.__IOBase_closed = False
+        self.streamholder = None # needed by AutoFlusher
+        get_autoflushher(space).add(self)
 
     def getdict(self, space):
         return self.w_dict
@@ -98,6 +102,7 @@
             space.call_method(self, "flush")
         finally:
             self.__IOBase_closed = True
+            get_autoflushher(space).remove(self)
 
     def flush_w(self, space):
         if self._CLOSED():
@@ -303,3 +308,60 @@
     read = interp2app(W_RawIOBase.read_w),
     readall = interp2app(W_RawIOBase.readall_w),
 )
+
+
+# ------------------------------------------------------------
+# functions to make sure that all streams are flushed on exit
+# ------------------------------------------------------------
+
+class StreamHolder(object):
+
+    def __init__(self, w_iobase):
+        self.w_iobase_ref = rweakref.ref(w_iobase)
+        w_iobase.autoflusher = self
+
+    def autoflush(self, space):
+        w_iobase = self.w_iobase_ref()
+        if w_iobase is not None:
+            try:
+                space.call_method(w_iobase, 'flush')
+            except OperationError, e:
+                # if it's an IOError or ValueError, ignore it (ValueError is
+                # raised if by chance we are trying to flush a file which has
+                # already been closed)
+                if not (e.match(space, space.w_IOError) or
+                        e.match(space, space.w_ValueError)):
+                    raise
+        
+
+class AutoFlusher(object):
+    
+    def __init__(self, space):
+        self.streams = {}
+
+    def add(self, w_iobase):
+        assert w_iobase.streamholder is None
+        holder = StreamHolder(w_iobase)
+        w_iobase.streamholder = holder
+        self.streams[holder] = None
+
+    def remove(self, w_iobase):
+        holder = w_iobase.streamholder
+        if holder is not None:
+            del self.streams[holder]
+
+    def flush_all(self, space):
+        while self.streams:
+            for streamholder in self.streams.keys():
+                try:
+                    del self.streams[streamholder]
+                except KeyError:
+                    pass    # key was removed in the meantime
+                else:
+                    streamholder.autoflush(space)
+
+
+def get_autoflushher(space):
+    return space.fromcache(AutoFlusher)
+
+
diff --git a/pypy/module/_io/test/test_fileio.py b/pypy/module/_io/test/test_fileio.py
--- a/pypy/module/_io/test/test_fileio.py
+++ b/pypy/module/_io/test/test_fileio.py
@@ -160,3 +160,42 @@
         f.close()
         assert repr(f) == "<_io.FileIO [closed]>"
 
+def test_flush_at_exit():
+    from pypy import conftest
+    from pypy.tool.option import make_config, make_objspace
+    from pypy.tool.udir import udir
+
+    tmpfile = udir.join('test_flush_at_exit')
+    config = make_config(conftest.option)
+    space = make_objspace(config)
+    space.appexec([space.wrap(str(tmpfile))], """(tmpfile):
+        import io
+        f = io.open(tmpfile, 'w', encoding='ascii')
+        f.write('42')
+        # no flush() and no close()
+        import sys; sys._keepalivesomewhereobscure = f
+    """)
+    space.finish()
+    assert tmpfile.read() == '42'
+
+def test_flush_at_exit_IOError_and_ValueError():
+    from pypy import conftest
+    from pypy.tool.option import make_config, make_objspace
+
+    config = make_config(conftest.option)
+    space = make_objspace(config)
+    space.appexec([], """():
+        import io
+        class MyStream(io.IOBase):
+            def flush(self):
+                raise IOError
+
+        class MyStream2(io.IOBase):
+            def flush(self):
+                raise ValueError
+
+        s = MyStream()
+        s2 = MyStream2()
+        import sys; sys._keepalivesomewhereobscure = s
+    """)
+    space.finish() # the IOError has been ignored
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -22,7 +22,7 @@
 eci = ExternalCompilationInfo(
     separate_module_files=[srcdir.join('profiling.c')],
     export_symbols=['pypy_setup_profiling', 'pypy_teardown_profiling'])
-                                                     
+
 c_setup_profiling = rffi.llexternal('pypy_setup_profiling',
                                   [], lltype.Void,
                                   compilation_info = eci)
@@ -228,7 +228,7 @@
         if w_self.builtins:
             key = create_spec(space, w_arg)
             w_self._enter_builtin_call(key)
-    elif event == 'c_return':
+    elif event == 'c_return' or event == 'c_exception':
         if w_self.builtins:
             key = create_spec(space, w_arg)
             w_self._enter_builtin_return(key)
@@ -237,7 +237,7 @@
         pass
 
 class W_Profiler(Wrappable):
-    
+
     def __init__(self, space, w_callable, time_unit, subcalls, builtins):
         self.subcalls = subcalls
         self.builtins = builtins
diff --git a/pypy/module/_lsprof/test/test_cprofile.py b/pypy/module/_lsprof/test/test_cprofile.py
--- a/pypy/module/_lsprof/test/test_cprofile.py
+++ b/pypy/module/_lsprof/test/test_cprofile.py
@@ -117,6 +117,20 @@
             assert 0.9 < subentry.totaltime < 2.9
             #assert 0.9 < subentry.inlinetime < 2.9
 
+    def test_builtin_exception(self):
+        import math
+        import _lsprof
+
+        prof = _lsprof.Profiler()
+        prof.enable()
+        try:
+            math.sqrt("a")
+        except TypeError:
+            pass
+        prof.disable()
+        stats = prof.getstats()
+        assert len(stats) == 2
+
     def test_use_cprofile(self):
         import sys, os
         # XXX this is evil trickery to walk around the fact that we don't
diff --git a/pypy/module/_md5/test/test_md5.py b/pypy/module/_md5/test/test_md5.py
--- a/pypy/module/_md5/test/test_md5.py
+++ b/pypy/module/_md5/test/test_md5.py
@@ -28,7 +28,7 @@
         assert self.md5.digest_size == 16
         #assert self.md5.digestsize == 16        -- not on CPython
         assert self.md5.md5().digest_size == 16
-        if sys.version >= (2, 5):
+        if sys.version_info >= (2, 5):
             assert self.md5.blocksize == 1
             assert self.md5.md5().digestsize == 16
 
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -11,6 +11,7 @@
 from pypy.objspace.std.register_all import register_all
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import specialize
 from pypy.rpython.lltypesystem import lltype, rffi
 
 
@@ -159,13 +160,15 @@
 
 
 def make_array(mytype):
+    W_ArrayBase = globals()['W_ArrayBase']
+
     class W_Array(W_ArrayBase):
         itemsize = mytype.bytes
         typecode = mytype.typecode
 
         @staticmethod
         def register(typeorder):
-            typeorder[W_Array] = []
+            typeorder[W_Array] = [(W_ArrayBase, None)]
 
         def __init__(self, space):
             self.space = space
@@ -583,13 +586,29 @@
             raise OperationError(space.w_ValueError, space.wrap(msg))
 
     # Compare methods
-    def cmp__Array_ANY(space, self, other):
-        if isinstance(other, W_ArrayBase):
-            w_lst1 = array_tolist__Array(space, self)
-            w_lst2 = space.call_method(other, 'tolist')
-            return space.cmp(w_lst1, w_lst2)
-        else:
-            return space.w_NotImplemented
+    @specialize.arg(3)
+    def _cmp_impl(space, self, other, space_fn):
+        w_lst1 = array_tolist__Array(space, self)
+        w_lst2 = space.call_method(other, 'tolist')
+        return space_fn(w_lst1, w_lst2)
+
+    def eq__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.eq)
+
+    def ne__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.ne)
+
+    def lt__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.lt)
+
+    def le__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.le)
+
+    def gt__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.gt)
+
+    def ge__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.ge)
 
     # Misc methods
 
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -536,12 +536,6 @@
                 assert (a >= c) is False
                 assert (c >= a) is True
 
-                assert cmp(a, a) == 0
-                assert cmp(a, b) == 0
-                assert cmp(a, c) <  0
-                assert cmp(b, a) == 0
-                assert cmp(c, a) >  0
-
     def test_reduce(self):
         import pickle
         a = self.array('i', [1, 2, 3])
@@ -851,8 +845,11 @@
         cls.maxint = sys.maxint
 
 class AppTestArray(BaseArrayTests):
+    OPTIONS = {}
+
     def setup_class(cls):
-        cls.space = gettestobjspace(usemodules=('array', 'struct', '_rawffi'))
+        cls.space = gettestobjspace(usemodules=('array', 'struct', '_rawffi'),
+                                    **cls.OPTIONS)
         cls.w_array = cls.space.appexec([], """():
             import array
             return array.array
@@ -874,3 +871,7 @@
         a = self.array('b', range(4))
         a[::-1] = a
         assert a == self.array('b', [3, 2, 1, 0])
+
+
+class AppTestArrayBuiltinShortcut(AppTestArray):
+    OPTIONS = {'objspace.std.builtinshortcut': True}
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -352,6 +352,9 @@
     'PyObject_AsReadBuffer', 'PyObject_AsWriteBuffer', 'PyObject_CheckReadBuffer',
 
     'PyOS_getsig', 'PyOS_setsig',
+    'PyThread_create_key', 'PyThread_delete_key', 'PyThread_set_key_value',
+    'PyThread_get_key_value', 'PyThread_delete_key_value',
+    'PyThread_ReInitTLS',
 
     'PyStructSequence_InitType', 'PyStructSequence_New',
 ]
@@ -385,6 +388,7 @@
         "Tuple": "space.w_tuple",
         "List": "space.w_list",
         "Set": "space.w_set",
+        "FrozenSet": "space.w_frozenset",
         "Int": "space.w_int",
         "Bool": "space.w_bool",
         "Float": "space.w_float",
@@ -406,7 +410,7 @@
         }.items():
         GLOBALS['Py%s_Type#' % (cpyname, )] = ('PyTypeObject*', pypyexpr)
 
-    for cpyname in 'Method List Int Long Dict Tuple Class'.split():
+    for cpyname in 'Method List Long Dict Tuple Class'.split():
         FORWARD_DECLS.append('typedef struct { PyObject_HEAD } '
                              'Py%sObject' % (cpyname, ))
 build_exported_objects()
@@ -616,6 +620,10 @@
         lambda space: init_pycobject(),
         lambda space: init_capsule(),
     ])
+    from pypy.module.posix.interp_posix import add_fork_hook
+    reinit_tls = rffi.llexternal('PyThread_ReInitTLS', [], lltype.Void,
+                                 compilation_info=eci)    
+    add_fork_hook('child', reinit_tls)
 
 def init_function(func):
     INIT_FUNCTIONS.append(func)
@@ -925,6 +933,7 @@
                                source_dir / "structseq.c",
                                source_dir / "capsule.c",
                                source_dir / "pysignals.c",
+                               source_dir / "thread.c",
                                ],
         separate_module_sources=separate_module_sources,
         export_symbols=export_symbols_eci,
diff --git a/pypy/module/cpyext/dictobject.py b/pypy/module/cpyext/dictobject.py
--- a/pypy/module/cpyext/dictobject.py
+++ b/pypy/module/cpyext/dictobject.py
@@ -184,8 +184,10 @@
         w_item = space.call_method(w_iter, "next")
         w_key, w_value = space.fixedview(w_item, 2)
         state = space.fromcache(RefcountState)
-        pkey[0]   = state.make_borrowed(w_dict, w_key)
-        pvalue[0] = state.make_borrowed(w_dict, w_value)
+        if pkey:
+            pkey[0]   = state.make_borrowed(w_dict, w_key)
+        if pvalue:
+            pvalue[0] = state.make_borrowed(w_dict, w_value)
         ppos[0] += 1
     except OperationError, e:
         if not e.match(space, space.w_StopIteration):
diff --git a/pypy/module/cpyext/eval.py b/pypy/module/cpyext/eval.py
--- a/pypy/module/cpyext/eval.py
+++ b/pypy/module/cpyext/eval.py
@@ -1,16 +1,24 @@
 from pypy.interpreter.error import OperationError
+from pypy.interpreter.astcompiler import consts
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, CANNOT_FAIL, CONST_STRING, FILEP, fread, feof, Py_ssize_tP,
     cpython_struct)
 from pypy.module.cpyext.pyobject import PyObject, borrow_from
 from pypy.module.cpyext.pyerrors import PyErr_SetFromErrno
+from pypy.module.cpyext.funcobject import PyCodeObject
 from pypy.module.__builtin__ import compiling
 
 PyCompilerFlags = cpython_struct(
-    "PyCompilerFlags", ())
+    "PyCompilerFlags", (("cf_flags", rffi.INT),))
 PyCompilerFlagsPtr = lltype.Ptr(PyCompilerFlags)
 
+PyCF_MASK = (consts.CO_FUTURE_DIVISION | 
+             consts.CO_FUTURE_ABSOLUTE_IMPORT |
+             consts.CO_FUTURE_WITH_STATEMENT |
+             consts.CO_FUTURE_PRINT_FUNCTION |
+             consts.CO_FUTURE_UNICODE_LITERALS)
+
 @cpython_api([PyObject, PyObject, PyObject], PyObject)
 def PyEval_CallObjectWithKeywords(space, w_obj, w_arg, w_kwds):
     return space.call(w_obj, w_arg, w_kwds)
@@ -48,6 +56,17 @@
         return None
     return borrow_from(None, caller.w_globals)
 
+ at cpython_api([PyCodeObject, PyObject, PyObject], PyObject)
+def PyEval_EvalCode(space, w_code, w_globals, w_locals):
+    """This is a simplified interface to PyEval_EvalCodeEx(), with just
+    the code object, and the dictionaries of global and local variables.
+    The other arguments are set to NULL."""
+    if w_globals is None:
+        w_globals = space.w_None
+    if w_locals is None:
+        w_locals = space.w_None
+    return compiling.eval(space, w_code, w_globals, w_locals)
+
 @cpython_api([PyObject, PyObject], PyObject)
 def PyObject_CallObject(space, w_obj, w_arg):
     """
@@ -74,7 +93,7 @@
 Py_file_input = 257
 Py_eval_input = 258
 
-def compile_string(space, source, filename, start):
+def compile_string(space, source, filename, start, flags=0):
     w_source = space.wrap(source)
     start = rffi.cast(lltype.Signed, start)
     if start == Py_file_input:
@@ -86,7 +105,7 @@
     else:
         raise OperationError(space.w_ValueError, space.wrap(
             "invalid mode parameter for compilation"))
-    return compiling.compile(space, w_source, filename, mode)
+    return compiling.compile(space, w_source, filename, mode, flags)
 
 def run_string(space, source, filename, start, w_globals, w_locals):
     w_code = compile_string(space, source, filename, start)
@@ -109,6 +128,24 @@
     filename = "<string>"
     return run_string(space, source, filename, start, w_globals, w_locals)
 
+ at cpython_api([rffi.CCHARP, rffi.INT_real, PyObject, PyObject,
+              PyCompilerFlagsPtr], PyObject)
+def PyRun_StringFlags(space, source, start, w_globals, w_locals, flagsptr):
+    """Execute Python source code from str in the context specified by the
+    dictionaries globals and locals with the compiler flags specified by
+    flags.  The parameter start specifies the start token that should be used to
+    parse the source code.
+
+    Returns the result of executing the code as a Python object, or NULL if an
+    exception was raised."""
+    source = rffi.charp2str(source)
+    if flagsptr:
+        flags = rffi.cast(lltype.Signed, flagsptr.c_cf_flags)
+    else:
+        flags = 0
+    w_code = compile_string(space, source, "<string>", start, flags)
+    return compiling.eval(space, w_code, w_globals, w_locals)
+
 @cpython_api([FILEP, CONST_STRING, rffi.INT_real, PyObject, PyObject], PyObject)
 def PyRun_File(space, fp, filename, start, w_globals, w_locals):
     """This is a simplified interface to PyRun_FileExFlags() below, leaving
@@ -150,7 +187,7 @@
 
 @cpython_api([rffi.CCHARP, rffi.CCHARP, rffi.INT_real, PyCompilerFlagsPtr],
              PyObject)
-def Py_CompileStringFlags(space, source, filename, start, flags):
+def Py_CompileStringFlags(space, source, filename, start, flagsptr):
     """Parse and compile the Python source code in str, returning the
     resulting code object.  The start token is given by start; this
     can be used to constrain the code which can be compiled and should
@@ -160,7 +197,30 @@
     returns NULL if the code cannot be parsed or compiled."""
     source = rffi.charp2str(source)
     filename = rffi.charp2str(filename)
-    if flags:
-        raise OperationError(space.w_NotImplementedError, space.wrap(
-                "cpyext Py_CompileStringFlags does not accept flags"))
-    return compile_string(space, source, filename, start)
+    if flagsptr:
+        flags = rffi.cast(lltype.Signed, flagsptr.c_cf_flags)
+    else:
+        flags = 0
+    return compile_string(space, source, filename, start, flags)
+
+ at cpython_api([PyCompilerFlagsPtr], rffi.INT_real, error=CANNOT_FAIL)
+def PyEval_MergeCompilerFlags(space, cf):
+    """This function changes the flags of the current evaluation
+    frame, and returns true on success, false on failure."""
+    flags = rffi.cast(lltype.Signed, cf.c_cf_flags)
+    result = flags != 0
+    current_frame = space.getexecutioncontext().gettopframe_nohidden()
+    if current_frame:
+        codeflags = current_frame.pycode.co_flags
+        compilerflags = codeflags & PyCF_MASK
+        if compilerflags:
+            result = 1
+            flags |= compilerflags
+        # No future keyword at the moment
+        # if codeflags & CO_GENERATOR_ALLOWED:
+        #     result = 1
+        #     flags |= CO_GENERATOR_ALLOWED
+    cf.c_cf_flags = rffi.cast(rffi.INT, flags)
+    return result
+
+        
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -1,6 +1,6 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    PyObjectFields, generic_cpy_call, CONST_STRING,
+    PyObjectFields, generic_cpy_call, CONST_STRING, CANNOT_FAIL,
     cpython_api, bootstrap_function, cpython_struct, build_type_checkers)
 from pypy.module.cpyext.pyobject import (
     PyObject, make_ref, from_ref, Py_DecRef, make_typedescr, borrow_from)
@@ -48,6 +48,7 @@
 
 PyFunction_Check, PyFunction_CheckExact = build_type_checkers("Function", Function)
 PyMethod_Check, PyMethod_CheckExact = build_type_checkers("Method", Method)
+PyCode_Check, PyCode_CheckExact = build_type_checkers("Code", PyCode)
 
 def function_attach(space, py_obj, w_obj):
     py_func = rffi.cast(PyFunctionObject, py_obj)
@@ -167,3 +168,9 @@
                              freevars=[],
                              cellvars=[]))
 
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyCode_GetNumFree(space, w_co):
+    """Return the number of free variables in co."""
+    co = space.interp_w(PyCode, w_co)
+    return len(co.co_freevars)
+
diff --git a/pypy/module/cpyext/include/Python.h b/pypy/module/cpyext/include/Python.h
--- a/pypy/module/cpyext/include/Python.h
+++ b/pypy/module/cpyext/include/Python.h
@@ -113,6 +113,7 @@
 #include "compile.h"
 #include "frameobject.h"
 #include "eval.h"
+#include "pymath.h"
 #include "pymem.h"
 #include "pycobject.h"
 #include "pycapsule.h"
diff --git a/pypy/module/cpyext/include/code.h b/pypy/module/cpyext/include/code.h
--- a/pypy/module/cpyext/include/code.h
+++ b/pypy/module/cpyext/include/code.h
@@ -13,13 +13,19 @@
 
 /* Masks for co_flags above */
 /* These values are also in funcobject.py */
-#define CO_OPTIMIZED	0x0001
-#define CO_NEWLOCALS	0x0002
-#define CO_VARARGS	0x0004
-#define CO_VARKEYWORDS	0x0008
+#define CO_OPTIMIZED    0x0001
+#define CO_NEWLOCALS    0x0002
+#define CO_VARARGS      0x0004
+#define CO_VARKEYWORDS  0x0008
 #define CO_NESTED       0x0010
 #define CO_GENERATOR    0x0020
 
+#define CO_FUTURE_DIVISION         0x02000
+#define CO_FUTURE_ABSOLUTE_IMPORT  0x04000
+#define CO_FUTURE_WITH_STATEMENT   0x08000
+#define CO_FUTURE_PRINT_FUNCTION   0x10000
+#define CO_FUTURE_UNICODE_LITERALS 0x20000
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/include/intobject.h b/pypy/module/cpyext/include/intobject.h
--- a/pypy/module/cpyext/include/intobject.h
+++ b/pypy/module/cpyext/include/intobject.h
@@ -7,6 +7,11 @@
 extern "C" {
 #endif
 
+typedef struct {
+    PyObject_HEAD
+    long ob_ival;
+} PyIntObject;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -56,6 +56,8 @@
 #define Py_TYPE(ob)		(((PyObject*)(ob))->ob_type)
 #define Py_SIZE(ob)		(((PyVarObject*)(ob))->ob_size)
 
+#define _Py_ForgetReference(ob) /* nothing */
+
 #define Py_None (&_Py_NoneStruct)
 
 /*
diff --git a/pypy/module/cpyext/include/pymath.h b/pypy/module/cpyext/include/pymath.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/include/pymath.h
@@ -0,0 +1,20 @@
+#ifndef Py_PYMATH_H
+#define Py_PYMATH_H
+
+/**************************************************************************
+Symbols and macros to supply platform-independent interfaces to mathematical
+functions and constants
+**************************************************************************/
+
+/* HUGE_VAL is supposed to expand to a positive double infinity.  Python
+ * uses Py_HUGE_VAL instead because some platforms are broken in this
+ * respect.  We used to embed code in pyport.h to try to worm around that,
+ * but different platforms are broken in conflicting ways.  If you're on
+ * a platform where HUGE_VAL is defined incorrectly, fiddle your Python
+ * config to #define Py_HUGE_VAL to something that works on your platform.
+ */
+#ifndef Py_HUGE_VAL
+#define Py_HUGE_VAL HUGE_VAL
+#endif
+
+#endif /* Py_PYMATH_H */
diff --git a/pypy/module/cpyext/include/pythonrun.h b/pypy/module/cpyext/include/pythonrun.h
--- a/pypy/module/cpyext/include/pythonrun.h
+++ b/pypy/module/cpyext/include/pythonrun.h
@@ -19,6 +19,14 @@
     int cf_flags;  /* bitmask of CO_xxx flags relevant to future */
 } PyCompilerFlags;
 
+#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \
+                   CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \
+                   CO_FUTURE_UNICODE_LITERALS)
+#define PyCF_MASK_OBSOLETE (CO_NESTED)
+#define PyCF_SOURCE_IS_UTF8  0x0100
+#define PyCF_DONT_IMPLY_DEDENT 0x0200
+#define PyCF_ONLY_AST 0x0400
+
 #define Py_CompileString(str, filename, start) Py_CompileStringFlags(str, filename, start, NULL)
 
 #ifdef __cplusplus
diff --git a/pypy/module/cpyext/include/pythread.h b/pypy/module/cpyext/include/pythread.h
--- a/pypy/module/cpyext/include/pythread.h
+++ b/pypy/module/cpyext/include/pythread.h
@@ -3,8 +3,26 @@
 
 #define WITH_THREAD
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef void *PyThread_type_lock;
 #define WAIT_LOCK	1
 #define NOWAIT_LOCK	0
 
+/* Thread Local Storage (TLS) API */
+PyAPI_FUNC(int) PyThread_create_key(void);
+PyAPI_FUNC(void) PyThread_delete_key(int);
+PyAPI_FUNC(int) PyThread_set_key_value(int, void *);
+PyAPI_FUNC(void *) PyThread_get_key_value(int);
+PyAPI_FUNC(void) PyThread_delete_key_value(int key);
+
+/* Cleanup after a fork */
+PyAPI_FUNC(void) PyThread_ReInitTLS(void);
+
+#ifdef __cplusplus
+}
 #endif
+
+#endif
diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py
--- a/pypy/module/cpyext/intobject.py
+++ b/pypy/module/cpyext/intobject.py
@@ -2,11 +2,37 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.interpreter.error import OperationError
 from pypy.module.cpyext.api import (
-    cpython_api, build_type_checkers, PyObject,
-    CONST_STRING, CANNOT_FAIL, Py_ssize_t)
+    cpython_api, cpython_struct, build_type_checkers, bootstrap_function,
+    PyObject, PyObjectFields, CONST_STRING, CANNOT_FAIL, Py_ssize_t)
+from pypy.module.cpyext.pyobject import (
+    make_typedescr, track_reference, RefcountState, from_ref)
 from pypy.rlib.rarithmetic import r_uint, intmask, LONG_TEST
+from pypy.objspace.std.intobject import W_IntObject
 import sys
 
+PyIntObjectStruct = lltype.ForwardReference()
+PyIntObject = lltype.Ptr(PyIntObjectStruct)
+PyIntObjectFields = PyObjectFields + \
+    (("ob_ival", rffi.LONG),)
+cpython_struct("PyIntObject", PyIntObjectFields, PyIntObjectStruct)
+
+ at bootstrap_function
+def init_intobject(space):
+    "Type description of PyIntObject"
+    make_typedescr(space.w_int.instancetypedef,
+                   basestruct=PyIntObject.TO,
+                   realize=int_realize)
+
+def int_realize(space, obj):
+    intval = rffi.cast(lltype.Signed, rffi.cast(PyIntObject, obj).c_ob_ival)
+    w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type))
+    w_obj = space.allocate_instance(W_IntObject, w_type)
+    w_obj.__init__(intval)
+    track_reference(space, obj, w_obj)
+    state = space.fromcache(RefcountState)
+    state.set_lifeline(w_obj, obj)
+    return w_obj
+
 PyInt_Check, PyInt_CheckExact = build_type_checkers("Int")
 
 @cpython_api([], lltype.Signed, error=CANNOT_FAIL)
diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py
--- a/pypy/module/cpyext/object.py
+++ b/pypy/module/cpyext/object.py
@@ -193,7 +193,7 @@
     if not obj:
         PyErr_NoMemory(space)
     obj.c_ob_type = type
-    _Py_NewReference(space, obj)
+    obj.c_ob_refcnt = 1
     return obj
 
 @cpython_api([PyVarObject, PyTypeObjectPtr, Py_ssize_t], PyObject)
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -17,6 +17,7 @@
 
 class BaseCpyTypedescr(object):
     basestruct = PyObject.TO
+    W_BaseObject = W_ObjectObject
 
     def get_dealloc(self, space):
         from pypy.module.cpyext.typeobject import subtype_dealloc
@@ -51,10 +52,14 @@
     def attach(self, space, pyobj, w_obj):
         pass
 
-    def realize(self, space, ref):
-        # For most types, a reference cannot exist without
-        # a real interpreter object
-        raise InvalidPointerException(str(ref))
+    def realize(self, space, obj):
+        w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type))
+        w_obj = space.allocate_instance(self.W_BaseObject, w_type)
+        track_reference(space, obj, w_obj)
+        if w_type is not space.gettypefor(self.W_BaseObject):
+            state = space.fromcache(RefcountState)
+            state.set_lifeline(w_obj, obj)
+        return w_obj
 
 typedescr_cache = {}
 
@@ -369,13 +374,7 @@
     obj.c_ob_refcnt = 1
     w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type))
     assert isinstance(w_type, W_TypeObject)
-    if w_type.is_cpytype():
-        w_obj = space.allocate_instance(W_ObjectObject, w_type)
-        track_reference(space, obj, w_obj)
-        state = space.fromcache(RefcountState)
-        state.set_lifeline(w_obj, obj)
-    else:
-        assert False, "Please add more cases in _Py_NewReference()"
+    get_typedescr(w_type.instancetypedef).realize(space, obj)
 
 def _Py_Dealloc(space, obj):
     from pypy.module.cpyext.api import generic_cpy_call_dont_decref
diff --git a/pypy/module/cpyext/src/thread.c b/pypy/module/cpyext/src/thread.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/src/thread.c
@@ -0,0 +1,313 @@
+#include <Python.h>
+#include "pythread.h"
+
+/* ------------------------------------------------------------------------
+Per-thread data ("key") support.
+
+Use PyThread_create_key() to create a new key.  This is typically shared
+across threads.
+
+Use PyThread_set_key_value(thekey, value) to associate void* value with
+thekey in the current thread.  Each thread has a distinct mapping of thekey
+to a void* value.  Caution:  if the current thread already has a mapping
+for thekey, value is ignored.
+
+Use PyThread_get_key_value(thekey) to retrieve the void* value associated
+with thekey in the current thread.  This returns NULL if no value is
+associated with thekey in the current thread.
+
+Use PyThread_delete_key_value(thekey) to forget the current thread's associated
+value for thekey.  PyThread_delete_key(thekey) forgets the values associated
+with thekey across *all* threads.
+
+While some of these functions have error-return values, none set any
+Python exception.
+
+None of the functions does memory management on behalf of the void* values.
+You need to allocate and deallocate them yourself.  If the void* values
+happen to be PyObject*, these functions don't do refcount operations on
+them either.
+
+The GIL does not need to be held when calling these functions; they supply
+their own locking.  This isn't true of PyThread_create_key(), though (see
+next paragraph).
+
+There's a hidden assumption that PyThread_create_key() will be called before
+any of the other functions are called.  There's also a hidden assumption
+that calls to PyThread_create_key() are serialized externally.
+------------------------------------------------------------------------ */
+
+#ifdef MS_WINDOWS
+#include <windows.h>
+
+/* use native Windows TLS functions */
+#define Py_HAVE_NATIVE_TLS
+
+int
+PyThread_create_key(void)
+{
+    return (int) TlsAlloc();
+}
+
+void
+PyThread_delete_key(int key)
+{
+    TlsFree(key);
+}
+
+/* We must be careful to emulate the strange semantics implemented in thread.c,
+ * where the value is only set if it hasn't been set before.
+ */
+int
+PyThread_set_key_value(int key, void *value)
+{
+    BOOL ok;
+    void *oldvalue;
+
+    assert(value != NULL);
+    oldvalue = TlsGetValue(key);
+    if (oldvalue != NULL)
+        /* ignore value if already set */
+        return 0;
+    ok = TlsSetValue(key, value);
+    if (!ok)
+        return -1;
+    return 0;
+}
+
+void *
+PyThread_get_key_value(int key)
+{
+    /* because TLS is used in the Py_END_ALLOW_THREAD macro,
+     * it is necessary to preserve the windows error state, because
+     * it is assumed to be preserved across the call to the macro.
+     * Ideally, the macro should be fixed, but it is simpler to
+     * do it here.
+     */
+    DWORD error = GetLastError();
+    void *result = TlsGetValue(key);
+    SetLastError(error);
+    return result;
+}
+
+void
+PyThread_delete_key_value(int key)
+{
+    /* NULL is used as "key missing", and it is also the default
+     * given by TlsGetValue() if nothing has been set yet.
+     */
+    TlsSetValue(key, NULL);
+}
+
+/* reinitialization of TLS is not necessary after fork when using
+ * the native TLS functions.  And forking isn't supported on Windows either.
+ */
+void
+PyThread_ReInitTLS(void)
+{}
+
+#else  /* MS_WINDOWS */
+
+/* A singly-linked list of struct key objects remembers all the key->value
+ * associations.  File static keyhead heads the list.  keymutex is used
+ * to enforce exclusion internally.
+ */
+struct key {
+    /* Next record in the list, or NULL if this is the last record. */
+    struct key *next;
+
+    /* The thread id, according to PyThread_get_thread_ident(). */
+    long id;
+
+    /* The key and its associated value. */
+    int key;
+    void *value;
+};
+
+static struct key *keyhead = NULL;
+static PyThread_type_lock keymutex = NULL;
+static int nkeys = 0;  /* PyThread_create_key() hands out nkeys+1 next */
+
+/* Internal helper.
+ * If the current thread has a mapping for key, the appropriate struct key*
+ * is returned.  NB:  value is ignored in this case!
+ * If there is no mapping for key in the current thread, then:
+ *     If value is NULL, NULL is returned.
+ *     Else a mapping of key to value is created for the current thread,
+ *     and a pointer to a new struct key* is returned; except that if
+ *     malloc() can't find room for a new struct key*, NULL is returned.
+ * So when value==NULL, this acts like a pure lookup routine, and when
+ * value!=NULL, this acts like dict.setdefault(), returning an existing
+ * mapping if one exists, else creating a new mapping.
+ *
+ * Caution:  this used to be too clever, trying to hold keymutex only
+ * around the "p->next = keyhead; keyhead = p" pair.  That allowed
+ * another thread to mutate the list, via key deletion, concurrent with
+ * find_key() crawling over the list.  Hilarity ensued.  For example, when
+ * the for-loop here does "p = p->next", p could end up pointing at a
+ * record that PyThread_delete_key_value() was concurrently free()'ing.
+ * That could lead to anything, from failing to find a key that exists, to
+ * segfaults.  Now we lock the whole routine.
+ */
+static struct key *
+find_key(int key, void *value)
+{
+    struct key *p, *prev_p;
+    long id = PyThread_get_thread_ident();
+
+    if (!keymutex)
+        return NULL;
+    PyThread_acquire_lock(keymutex, 1);
+    prev_p = NULL;
+    for (p = keyhead; p != NULL; p = p->next) {
+        if (p->id == id && p->key == key)
+            goto Done;
+        /* Sanity check.  These states should never happen but if
+         * they do we must abort.  Otherwise we'll end up spinning in
+         * in a tight loop with the lock held.  A similar check is done
+         * in pystate.c tstate_delete_common().  */
+        if (p == prev_p)
+            Py_FatalError("tls find_key: small circular list(!)");
+        prev_p = p;
+        if (p->next == keyhead)
+            Py_FatalError("tls find_key: circular list(!)");
+    }
+    if (value == NULL) {
+        assert(p == NULL);
+        goto Done;
+    }
+    p = (struct key *)malloc(sizeof(struct key));
+    if (p != NULL) {
+        p->id = id;
+        p->key = key;
+        p->value = value;
+        p->next = keyhead;
+        keyhead = p;
+    }
+ Done:
+    PyThread_release_lock(keymutex);
+    return p;
+}
+
+/* Return a new key.  This must be called before any other functions in
+ * this family, and callers must arrange to serialize calls to this
+ * function.  No violations are detected.
+ */
+int
+PyThread_create_key(void)
+{
+    /* All parts of this function are wrong if it's called by multiple
+     * threads simultaneously.
+     */
+    if (keymutex == NULL)
+        keymutex = PyThread_allocate_lock();
+    return ++nkeys;
+}
+
+/* Forget the associations for key across *all* threads. */
+void
+PyThread_delete_key(int key)
+{
+    struct key *p, **q;
+
+    PyThread_acquire_lock(keymutex, 1);
+    q = &keyhead;
+    while ((p = *q) != NULL) {
+        if (p->key == key) {
+            *q = p->next;
+            free((void *)p);
+            /* NB This does *not* free p->value! */
+        }
+        else
+            q = &p->next;
+    }
+    PyThread_release_lock(keymutex);
+}
+
+/* Confusing:  If the current thread has an association for key,
+ * value is ignored, and 0 is returned.  Else an attempt is made to create
+ * an association of key to value for the current thread.  0 is returned
+ * if that succeeds, but -1 is returned if there's not enough memory
+ * to create the association.  value must not be NULL.
+ */
+int
+PyThread_set_key_value(int key, void *value)
+{
+    struct key *p;
+
+    assert(value != NULL);
+    p = find_key(key, value);
+    if (p == NULL)
+        return -1;
+    else
+        return 0;
+}
+
+/* Retrieve the value associated with key in the current thread, or NULL
+ * if the current thread doesn't have an association for key.
+ */
+void *
+PyThread_get_key_value(int key)
+{
+    struct key *p = find_key(key, NULL);
+
+    if (p == NULL)
+        return NULL;
+    else
+        return p->value;
+}
+
+/* Forget the current thread's association for key, if any. */
+void
+PyThread_delete_key_value(int key)
+{
+    long id = PyThread_get_thread_ident();
+    struct key *p, **q;
+
+    PyThread_acquire_lock(keymutex, 1);
+    q = &keyhead;
+    while ((p = *q) != NULL) {
+        if (p->key == key && p->id == id) {
+            *q = p->next;
+            free((void *)p);
+            /* NB This does *not* free p->value! */
+            break;
+        }
+        else
+            q = &p->next;
+    }
+    PyThread_release_lock(keymutex);
+}
+
+/* Forget everything not associated with the current thread id.
+ * This function is called from PyOS_AfterFork().  It is necessary
+ * because other thread ids which were in use at the time of the fork
+ * may be reused for new threads created in the forked process.
+ */
+void
+PyThread_ReInitTLS(void)
+{
+    long id = PyThread_get_thread_ident();
+    struct key *p, **q;
+
+    if (!keymutex)
+        return;
+
+    /* As with interpreter_lock in PyEval_ReInitThreads()
+       we just create a new lock without freeing the old one */
+    keymutex = PyThread_allocate_lock();
+
+    /* Delete all keys which do not match the current thread id */
+    q = &keyhead;
+    while ((p = *q) != NULL) {
+        if (p->id != id) {
+            *q = p->next;
+            free((void *)p);
+            /* NB This does *not* free p->value! */
+        }
+        else
+            q = &p->next;
+    }
+}
+
+#endif  /* !MS_WINDOWS */
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -182,16 +182,6 @@
     used as the positional and keyword parameters to the object's constructor."""
     raise NotImplementedError
 
- at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
-def PyCode_Check(space, co):
-    """Return true if co is a code object"""
-    raise NotImplementedError
-
- at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
-def PyCode_GetNumFree(space, co):
-    """Return the number of free variables in co."""
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=-1)
 def PyCodec_Register(space, search_function):
     """Register a new codec search function.
@@ -1293,28 +1283,6 @@
     that haven't been explicitly destroyed at that point."""
     raise NotImplementedError
 
- at cpython_api([rffi.VOIDP], lltype.Void)
-def Py_AddPendingCall(space, func):
-    """Post a notification to the Python main thread.  If successful, func will
-    be called with the argument arg at the earliest convenience.  func will be
-    called having the global interpreter lock held and can thus use the full
-    Python API and can take any action such as setting object attributes to
-    signal IO completion.  It must return 0 on success, or -1 signalling an
-    exception.  The notification function won't be interrupted to perform another
-    asynchronous notification recursively, but it can still be interrupted to
-    switch threads if the global interpreter lock is released, for example, if it
-    calls back into Python code.
-
-    This function returns 0 on success in which case the notification has been
-    scheduled.  Otherwise, for example if the notification buffer is full, it
-    returns -1 without setting any exception.
-
-    This function can be called on any thread, be it a Python thread or some
-    other system thread.  If it is a Python thread, it doesn't matter if it holds
-    the global interpreter lock or not.
-    """
-    raise NotImplementedError
-
 @cpython_api([Py_tracefunc, PyObject], lltype.Void)
 def PyEval_SetProfile(space, func, obj):
     """Set the profiler function to func.  The obj parameter is passed to the
@@ -1875,26 +1843,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISTITLE(space, ch):
-    """Return 1 or 0 depending on whether ch is a titlecase character."""
-    raise NotImplementedError
-
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISDIGIT(space, ch):
-    """Return 1 or 0 depending on whether ch is a digit character."""
-    raise NotImplementedError
-
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISNUMERIC(space, ch):
-    """Return 1 or 0 depending on whether ch is a numeric character."""
-    raise NotImplementedError
-
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISALPHA(space, ch):
-    """Return 1 or 0 depending on whether ch is an alphabetic character."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP], PyObject)
 def PyUnicode_FromFormat(space, format):
     """Take a C printf()-style format string and a variable number of
@@ -2339,17 +2287,6 @@
     use the default error handling."""
     raise NotImplementedError
 
- at cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t, rffi.INT_real], rffi.INT_real, error=-1)
-def PyUnicode_Tailmatch(space, str, substr, start, end, direction):
-    """Return 1 if substr matches str*[*start:end] at the given tail end
-    (direction == -1 means to do a prefix match, direction == 1 a suffix match),
-    0 otherwise. Return -1 if an error occurred.
-
-    This function used an int type for start and end. This
-    might require changes in your code for properly supporting 64-bit
-    systems."""
-    raise NotImplementedError
-
 @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t, rffi.INT_real], Py_ssize_t, error=-2)
 def PyUnicode_Find(space, str, substr, start, end, direction):
     """Return the first position of substr in str*[*start:end] using the given
@@ -2373,16 +2310,6 @@
     properly supporting 64-bit systems."""
     raise NotImplementedError
 
- at cpython_api([PyObject, PyObject, PyObject, Py_ssize_t], PyObject)
-def PyUnicode_Replace(space, str, substr, replstr, maxcount):
-    """Replace at most maxcount occurrences of substr in str with replstr and
-    return the resulting Unicode object. maxcount == -1 means replace all
-    occurrences.
-
-    This function used an int type for maxcount. This might
-    require changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([PyObject, PyObject, rffi.INT_real], PyObject)
 def PyUnicode_RichCompare(space, left, right, op):
     """Rich compare two unicode strings and return one of the following:
@@ -2556,17 +2483,6 @@
     source code is read from fp instead of an in-memory string."""
     raise NotImplementedError
 
- at cpython_api([rffi.CCHARP, rffi.INT_real, PyObject, PyObject, PyCompilerFlags], PyObject)
-def PyRun_StringFlags(space, str, start, globals, locals, flags):
-    """Execute Python source code from str in the context specified by the
-    dictionaries globals and locals with the compiler flags specified by
-    flags.  The parameter start specifies the start token that should be used to
-    parse the source code.
-
-    Returns the result of executing the code as a Python object, or NULL if an
-    exception was raised."""
-    raise NotImplementedError
-
 @cpython_api([FILE, rffi.CCHARP, rffi.INT_real, PyObject, PyObject, rffi.INT_real], PyObject)
 def PyRun_FileEx(space, fp, filename, start, globals, locals, closeit):
     """This is a simplified interface to PyRun_FileExFlags() below, leaving
@@ -2587,13 +2503,6 @@
     returns."""
     raise NotImplementedError
 
- at cpython_api([PyCodeObject, PyObject, PyObject], PyObject)
-def PyEval_EvalCode(space, co, globals, locals):
-    """This is a simplified interface to PyEval_EvalCodeEx(), with just
-    the code object, and the dictionaries of global and local variables.
-    The other arguments are set to NULL."""
-    raise NotImplementedError
-
 @cpython_api([PyCodeObject, PyObject, PyObject, PyObjectP, rffi.INT_real, PyObjectP, rffi.INT_real, PyObjectP, rffi.INT_real, PyObject], PyObject)
 def PyEval_EvalCodeEx(space, co, globals, locals, args, argcount, kws, kwcount, defs, defcount, closure):
     """Evaluate a precompiled code object, given a particular environment for its
@@ -2618,12 +2527,6 @@
     throw() methods of generator objects."""
     raise NotImplementedError
 
- at cpython_api([PyCompilerFlags], rffi.INT_real, error=CANNOT_FAIL)
-def PyEval_MergeCompilerFlags(space, cf):
-    """This function changes the flags of the current evaluation frame, and returns
-    true on success, false on failure."""
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyWeakref_Check(space, ob):
     """Return true if ob is either a reference or proxy object.
diff --git a/pypy/module/cpyext/stubsactive.py b/pypy/module/cpyext/stubsactive.py
--- a/pypy/module/cpyext/stubsactive.py
+++ b/pypy/module/cpyext/stubsactive.py
@@ -38,3 +38,31 @@
 def Py_MakePendingCalls(space):
     return 0
 
+pending_call = lltype.Ptr(lltype.FuncType([rffi.VOIDP], rffi.INT_real))
+ at cpython_api([pending_call, rffi.VOIDP], rffi.INT_real, error=-1)
+def Py_AddPendingCall(space, func, arg):
+    """Post a notification to the Python main thread.  If successful,
+    func will be called with the argument arg at the earliest
+    convenience.  func will be called having the global interpreter
+    lock held and can thus use the full Python API and can take any
+    action such as setting object attributes to signal IO completion.
+    It must return 0 on success, or -1 signalling an exception.  The
+    notification function won't be interrupted to perform another
+    asynchronous notification recursively, but it can still be
+    interrupted to switch threads if the global interpreter lock is
+    released, for example, if it calls back into Python code.
+
+    This function returns 0 on success in which case the notification
+    has been scheduled.  Otherwise, for example if the notification
+    buffer is full, it returns -1 without setting any exception.
+
+    This function can be called on any thread, be it a Python thread
+    or some other system thread.  If it is a Python thread, it doesn't
+    matter if it holds the global interpreter lock or not.
+    """
+    return -1
+
+thread_func = lltype.Ptr(lltype.FuncType([rffi.VOIDP], lltype.Void))
+ at cpython_api([thread_func, rffi.VOIDP], rffi.INT_real, error=-1)
+def PyThread_start_new_thread(space, func, arg):
+    return -1
diff --git a/pypy/module/cpyext/test/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py
--- a/pypy/module/cpyext/test/test_dictobject.py
+++ b/pypy/module/cpyext/test/test_dictobject.py
@@ -112,6 +112,37 @@
         assert space.eq_w(space.len(w_copy), space.len(w_dict))
         assert space.eq_w(w_copy, w_dict)
 
+    def test_iterkeys(self, space, api):
+        w_dict = space.sys.getdict(space)
+        py_dict = make_ref(space, w_dict)
+
+        ppos = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw')
+        pkey = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
+        pvalue = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
+
+        keys_w = []
+        values_w = []
+        try:
+            ppos[0] = 0
+            while api.PyDict_Next(w_dict, ppos, pkey, None):
+                w_key = from_ref(space, pkey[0])
+                keys_w.append(w_key)
+            ppos[0] = 0
+            while api.PyDict_Next(w_dict, ppos, None, pvalue):
+                w_value = from_ref(space, pvalue[0])
+                values_w.append(w_value)
+        finally:
+            lltype.free(ppos, flavor='raw')
+            lltype.free(pkey, flavor='raw')
+            lltype.free(pvalue, flavor='raw')
+
+        api.Py_DecRef(py_dict) # release borrowed references
+
+        assert space.eq_w(space.newlist(keys_w),
+                          space.call_method(w_dict, "keys"))
+        assert space.eq_w(space.newlist(values_w),
+                          space.call_method(w_dict, "values"))
+
     def test_dictproxy(self, space, api):
         w_dict = space.sys.get('modules')
         w_proxy = api.PyDictProxy_New(w_dict)
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -2,9 +2,10 @@
 from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 from pypy.module.cpyext.test.test_api import BaseApiTest
 from pypy.module.cpyext.eval import (
-    Py_single_input, Py_file_input, Py_eval_input)
+    Py_single_input, Py_file_input, Py_eval_input, PyCompilerFlags)
 from pypy.module.cpyext.api import fopen, fclose, fileno, Py_ssize_tP
 from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.astcompiler import consts
 from pypy.tool.udir import udir
 import sys, os
 
@@ -63,6 +64,22 @@
 
         assert space.int_w(w_res) == 10
 
+    def test_evalcode(self, space, api):
+        w_f = space.appexec([], """():
+            def f(*args):
+                assert isinstance(args, tuple)
+                return len(args) + 8
+            return f
+            """)
+
+        w_t = space.newtuple([space.wrap(1), space.wrap(2)])
+        w_globals = space.newdict()
+        w_locals = space.newdict()
+        space.setitem(w_locals, space.wrap("args"), w_t)
+        w_res = api.PyEval_EvalCode(w_f.code, w_globals, w_locals)
+
+        assert space.int_w(w_res) == 10
+
     def test_run_simple_string(self, space, api):
         def run(code):
             buf = rffi.str2charp(code)
@@ -96,6 +113,20 @@
         assert 42 * 43 == space.unwrap(
             api.PyObject_GetItem(w_globals, space.wrap("a")))
 
+    def test_run_string_flags(self, space, api):
+        flags = lltype.malloc(PyCompilerFlags, flavor='raw')
+        flags.c_cf_flags = rffi.cast(rffi.INT, consts.PyCF_SOURCE_IS_UTF8)
+        w_globals = space.newdict()
+        buf = rffi.str2charp("a = u'caf\xc3\xa9'")
+        try:
+            api.PyRun_StringFlags(buf, Py_single_input,
+                                  w_globals, w_globals, flags)
+        finally:
+            rffi.free_charp(buf)
+        w_a = space.getitem(w_globals, space.wrap("a"))
+        assert space.unwrap(w_a) == u'caf\xe9'
+        lltype.free(flags, flavor='raw')
+
     def test_run_file(self, space, api):
         filepath = udir / "cpyext_test_runfile.py"
         filepath.write("raise ZeroDivisionError")
@@ -256,3 +287,21 @@
         print dir(mod)
         print mod.__dict__
         assert mod.f(42) == 47
+
+    def test_merge_compiler_flags(self):
+        module = self.import_extension('foo', [
+            ("get_flags", "METH_NOARGS",
+             """
+                PyCompilerFlags flags;
+                flags.cf_flags = 0;
+                int result = PyEval_MergeCompilerFlags(&flags);
+                return Py_BuildValue("ii", result, flags.cf_flags);
+             """),
+            ])
+        assert module.get_flags() == (0, 0)
+
+        ns = {'module':module}
+        exec """from __future__ import division    \nif 1:
+                def nested_flags():
+                    return module.get_flags()""" in ns
+        assert ns['nested_flags']() == (1, 0x2000)  # CO_FUTURE_DIVISION
diff --git a/pypy/module/cpyext/test/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py
--- a/pypy/module/cpyext/test/test_funcobject.py
+++ b/pypy/module/cpyext/test/test_funcobject.py
@@ -81,6 +81,14 @@
         rffi.free_charp(filename)
         rffi.free_charp(funcname)
 
+    def test_getnumfree(self, space, api):
+        w_function = space.appexec([], """():
+            a = 5
+            def method(x): return a, x
+            return method
+        """)
+        assert api.PyCode_GetNumFree(w_function.code) == 1
+
     def test_classmethod(self, space, api):
         w_function = space.appexec([], """():
             def method(x): return x
diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py
--- a/pypy/module/cpyext/test/test_intobject.py
+++ b/pypy/module/cpyext/test/test_intobject.py
@@ -65,4 +65,97 @@
         values = module.values()
         types = [type(x) for x in values]
         assert types == [int, long, int, int]
-        
+
+    def test_int_subtype(self):
+        module = self.import_extension(
+            'foo', [
+            ("newEnum", "METH_VARARGS",
+             """
+                EnumObject *enumObj;
+                long intval;
+                PyObject *name;
+
+                if (!PyArg_ParseTuple(args, "Oi", &name, &intval))
+                    return NULL;
+
+                PyType_Ready(&Enum_Type);
+                enumObj = PyObject_New(EnumObject, &Enum_Type);
+                if (!enumObj) {
+                    return NULL;
+                }
+
+                enumObj->ob_ival = intval;
+                Py_INCREF(name);
+                enumObj->ob_name = name;
+
+                return (PyObject *)enumObj;
+             """),
+            ], 
+            prologue="""
+            typedef struct
+            {
+                PyObject_HEAD
+                long ob_ival;
+                PyObject* ob_name;
+            } EnumObject;
+
+            static void
+            enum_dealloc(EnumObject *op)
+            {
+                    Py_DECREF(op->ob_name);
+                    Py_TYPE(op)->tp_free((PyObject *)op);
+            }
+
+            static PyMemberDef enum_members[] = {
+                {"name", T_OBJECT, offsetof(EnumObject, ob_name), 0, NULL},
+                {NULL}  /* Sentinel */
+            };
+
+            PyTypeObject Enum_Type = {
+                PyObject_HEAD_INIT(0)
+                /*ob_size*/             0,
+                /*tp_name*/             "Enum",
+                /*tp_basicsize*/        sizeof(EnumObject),
+                /*tp_itemsize*/         0,
+                /*tp_dealloc*/          enum_dealloc,
+                /*tp_print*/            0,
+                /*tp_getattr*/          0,
+                /*tp_setattr*/          0,
+                /*tp_compare*/          0,
+                /*tp_repr*/             0,
+                /*tp_as_number*/        0,
+                /*tp_as_sequence*/      0,
+                /*tp_as_mapping*/       0,
+                /*tp_hash*/             0,
+                /*tp_call*/             0,
+                /*tp_str*/              0,
+                /*tp_getattro*/         0,
+                /*tp_setattro*/         0,
+                /*tp_as_buffer*/        0,
+                /*tp_flags*/            Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+                /*tp_doc*/              0,
+                /*tp_traverse*/         0,
+                /*tp_clear*/            0,
+                /*tp_richcompare*/      0,
+                /*tp_weaklistoffset*/   0,
+                /*tp_iter*/             0,
+                /*tp_iternext*/         0,
+                /*tp_methods*/          0,
+                /*tp_members*/          enum_members,
+                /*tp_getset*/           0,
+                /*tp_base*/             &PyInt_Type,
+                /*tp_dict*/             0,
+                /*tp_descr_get*/        0,
+                /*tp_descr_set*/        0,
+                /*tp_dictoffset*/       0,
+                /*tp_init*/             0,
+                /*tp_alloc*/            0,
+                /*tp_new*/              0
+            };
+            """)
+
+        a = module.newEnum("ULTIMATE_ANSWER", 42)
+        assert type(a).__name__ == "Enum"
+        assert isinstance(a, int)
+        assert a == int(a) == 42
+        assert a.name == "ULTIMATE_ANSWER"
diff --git a/pypy/module/cpyext/test/test_thread.py b/pypy/module/cpyext/test/test_thread.py
--- a/pypy/module/cpyext/test/test_thread.py
+++ b/pypy/module/cpyext/test/test_thread.py
@@ -5,6 +5,7 @@
 
 from pypy.module.thread.ll_thread import allocate_ll_lock
 from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 
 
 class TestPyThread(BaseApiTest):
@@ -38,3 +39,51 @@
         api.PyThread_release_lock(lock)
         assert api.PyThread_acquire_lock(lock, 0) == 1
         api.PyThread_free_lock(lock)
+
+
+class AppTestThread(AppTestCpythonExtensionBase):
+    def test_tls(self):
+        module = self.import_extension('foo', [
+            ("create_key", "METH_NOARGS",
+             """
+                 return PyInt_FromLong(PyThread_create_key());
+             """),
+            ("test_key", "METH_O",
+             """
+                 int key = PyInt_AsLong(args);
+                 if (PyThread_get_key_value(key) != NULL) {
+                     PyErr_SetNone(PyExc_ValueError);
+                     return NULL;
+                 }
+                 if (PyThread_set_key_value(key, (void*)123) < 0) {
+                     PyErr_SetNone(PyExc_ValueError);
+                     return NULL;
+                 }
+                 if (PyThread_get_key_value(key) != (void*)123) {
+                     PyErr_SetNone(PyExc_ValueError);
+                     return NULL;
+                 }
+                 Py_RETURN_NONE;
+             """),
+            ])
+        key = module.create_key()
+        assert key > 0
+        # Test value in main thread.
+        module.test_key(key)
+        raises(ValueError, module.test_key, key)
+        # Same test, in another thread.
+        result = []
+        import thread, time
+        def in_thread():
+            try:
+                module.test_key(key)
+                raises(ValueError, module.test_key, key)
+            except Exception, e:
+                result.append(e)
+            else:
+                result.append(True)
+        thread.start_new_thread(in_thread, ())
+        while not result:
+            print "."
+            time.sleep(.5)
+        assert result == [True]
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -204,8 +204,18 @@
             assert api.Py_UNICODE_ISSPACE(unichr(char))
         assert not api.Py_UNICODE_ISSPACE(u'a')
 
+        assert api.Py_UNICODE_ISALPHA(u'a')
+        assert not api.Py_UNICODE_ISALPHA(u'0')
+        assert api.Py_UNICODE_ISALNUM(u'a')
+        assert api.Py_UNICODE_ISALNUM(u'0')
+        assert not api.Py_UNICODE_ISALNUM(u'+')
+
         assert api.Py_UNICODE_ISDECIMAL(u'\u0660')
         assert not api.Py_UNICODE_ISDECIMAL(u'a')
+        assert api.Py_UNICODE_ISDIGIT(u'9')
+        assert not api.Py_UNICODE_ISDIGIT(u'@')
+        assert api.Py_UNICODE_ISNUMERIC(u'9')
+        assert not api.Py_UNICODE_ISNUMERIC(u'@')
 
         for char in [0x0a, 0x0d, 0x1c, 0x1d, 0x1e, 0x85, 0x2028, 0x2029]:
             assert api.Py_UNICODE_ISLINEBREAK(unichr(char))
@@ -216,6 +226,9 @@
         assert not api.Py_UNICODE_ISUPPER(u'a')
         assert not api.Py_UNICODE_ISLOWER(u'&#65533;')
         assert api.Py_UNICODE_ISUPPER(u'&#65533;')
+        assert not api.Py_UNICODE_ISTITLE(u'A')
+        assert api.Py_UNICODE_ISTITLE(
+            u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}')
 
     def test_TOLOWER(self, space, api):
         assert api.Py_UNICODE_TOLOWER(u'&#65533;') == u'&#65533;'
@@ -429,3 +442,18 @@
         w_char = api.PyUnicode_FromOrdinal(0xFFFF)
         assert space.unwrap(w_char) == u'\uFFFF'
 
+    def test_replace(self, space, api):
+        w_str = space.wrap(u"abababab")
+        w_substr = space.wrap(u"a")
+        w_replstr = space.wrap(u"z")
+        assert u"zbzbabab" == space.unwrap(
+            api.PyUnicode_Replace(w_str, w_substr, w_replstr, 2))
+        assert u"zbzbzbzb" == space.unwrap(
+            api.PyUnicode_Replace(w_str, w_substr, w_replstr, -1))
+
+    def test_tailmatch(self, space, api):
+        w_str = space.wrap(u"abcdef")
+        assert api.PyUnicode_Tailmatch(w_str, space.wrap("cde"), 2, 10, 1) == 1
+        assert api.PyUnicode_Tailmatch(w_str, space.wrap("cde"), 1, 5, -1) == 1
+        self.raises(space, api, TypeError,
+                    api.PyUnicode_Tailmatch, w_str, space.wrap(3), 2, 10, 1)
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -12,7 +12,7 @@
     make_typedescr, get_typedescr)
 from pypy.module.cpyext.stringobject import PyString_Check
 from pypy.module.sys.interp_encoding import setdefaultencoding
-from pypy.objspace.std import unicodeobject, unicodetype
+from pypy.objspace.std import unicodeobject, unicodetype, stringtype
 from pypy.rlib import runicode
 from pypy.tool.sourcetools import func_renamer
 import sys
@@ -89,6 +89,11 @@
     return unicodedb.isspace(ord(ch))
 
 @cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISALPHA(space, ch):
+    """Return 1 or 0 depending on whether ch is an alphabetic character."""
+    return unicodedb.isalpha(ord(ch))
+
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
 def Py_UNICODE_ISALNUM(space, ch):
     """Return 1 or 0 depending on whether ch is an alphanumeric character."""
     return unicodedb.isalnum(ord(ch))
@@ -104,6 +109,16 @@
     return unicodedb.isdecimal(ord(ch))
 
 @cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISDIGIT(space, ch):
+    """Return 1 or 0 depending on whether ch is a digit character."""
+    return unicodedb.isdigit(ord(ch))
+
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISNUMERIC(space, ch):
+    """Return 1 or 0 depending on whether ch is a numeric character."""
+    return unicodedb.isnumeric(ord(ch))
+
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
 def Py_UNICODE_ISLOWER(space, ch):
     """Return 1 or 0 depending on whether ch is a lowercase character."""
     return unicodedb.islower(ord(ch))
@@ -113,6 +128,11 @@
     """Return 1 or 0 depending on whether ch is an uppercase character."""
     return unicodedb.isupper(ord(ch))
 
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISTITLE(space, ch):
+    """Return 1 or 0 depending on whether ch is a titlecase character."""
+    return unicodedb.istitle(ord(ch))
+
 @cpython_api([Py_UNICODE], Py_UNICODE, error=CANNOT_FAIL)
 def Py_UNICODE_TOLOWER(space, ch):
     """Return the character ch converted to lower case."""
@@ -155,6 +175,11 @@
     except KeyError:
         return -1.0
 
+ at cpython_api([], Py_UNICODE, error=CANNOT_FAIL)
+def PyUnicode_GetMax(space):
+    """Get the maximum ordinal for a Unicode character."""
+    return runicode.UNICHR(runicode.MAXUNICODE)
+
 @cpython_api([PyObject], rffi.CCHARP, error=CANNOT_FAIL)
 def PyUnicode_AS_DATA(space, ref):
     """Return a pointer to the internal buffer of the object. o has to be a
@@ -548,6 +573,28 @@
 
 @cpython_api([PyObject, PyObject], PyObject)
 def PyUnicode_Join(space, w_sep, w_seq):
-    """Join a sequence of strings using the given separator and return the resulting
-    Unicode string."""
+    """Join a sequence of strings using the given separator and return
+    the resulting Unicode string."""
     return space.call_method(w_sep, 'join', w_seq)
+
+ at cpython_api([PyObject, PyObject, PyObject, Py_ssize_t], PyObject)
+def PyUnicode_Replace(space, w_str, w_substr, w_replstr, maxcount):
+    """Replace at most maxcount occurrences of substr in str with replstr and
+    return the resulting Unicode object. maxcount == -1 means replace all
+    occurrences."""
+    return space.call_method(w_str, "replace", w_substr, w_replstr,
+                             space.wrap(maxcount))
+
+ at cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t, rffi.INT_real],
+             rffi.INT_real, error=-1)
+def PyUnicode_Tailmatch(space, w_str, w_substr, start, end, direction):
+    """Return 1 if substr matches str[start:end] at the given tail end
+    (direction == -1 means to do a prefix match, direction == 1 a
+    suffix match), 0 otherwise. Return -1 if an error occurred."""
+    str = space.unicode_w(w_str)
+    substr = space.unicode_w(w_substr)
+    if rffi.cast(lltype.Signed, direction) >= 0:
+        return stringtype.stringstartswith(str, substr, start, end)
+    else:
+        return stringtype.stringendswith(str, substr, start, end)
+
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -1,10 +1,11 @@
 from pypy.module.imp import importing
 from pypy.module._file.interp_file import W_File
 from pypy.rlib import streamio
+from pypy.rlib.streamio import StreamErrors
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.module import Module
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.module._file.interp_stream import StreamErrors, wrap_streamerror
+from pypy.interpreter.streamutil import wrap_streamerror
 
 
 def get_suffixes(space):
diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -357,7 +357,7 @@
 
     def test_cannot_write_pyc(self):
         import sys, os
-        p = os.path.join(sys.path[-1], 'readonly')
+        p = os.path.join(sys.path[0], 'readonly')
         try:
             os.chmod(p, 0555)
         except:
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -67,10 +67,12 @@
         ("arccos", "arccos"),
         ("arcsin", "arcsin"),
         ("arctan", "arctan"),
+        ("arccosh", "arccosh"),
         ("arcsinh", "arcsinh"),
         ("arctanh", "arctanh"),
         ("copysign", "copysign"),
         ("cos", "cos"),
+        ("cosh", "cosh"),
         ("divide", "divide"),
         ("true_divide", "true_divide"),
         ("equal", "equal"),
@@ -90,9 +92,11 @@
         ("reciprocal", "reciprocal"),
         ("sign", "sign"),
         ("sin", "sin"),
+        ("sinh", "sinh"),
         ("subtract", "subtract"),
         ('sqrt', 'sqrt'),
         ("tan", "tan"),
+        ("tanh", "tanh"),
         ('bitwise_and', 'bitwise_and'),
         ('bitwise_or', 'bitwise_or'),
         ('bitwise_xor', 'bitwise_xor'),
diff --git a/pypy/module/micronumpy/interp_boxes.py b/pypy/module/micronumpy/interp_boxes.py
--- a/pypy/module/micronumpy/interp_boxes.py
+++ b/pypy/module/micronumpy/interp_boxes.py
@@ -1,6 +1,6 @@
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import operationerrfmt
-from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
 from pypy.objspace.std.floattype import float_typedef
 from pypy.objspace.std.inttype import int_typedef
@@ -29,7 +29,6 @@
     def convert_to(self, dtype):
         return dtype.box(self.value)
 
-
 class W_GenericBox(Wrappable):
     _attrs_ = ()
 
@@ -39,10 +38,10 @@
         )
 
     def descr_str(self, space):
-        return self.descr_repr(space)
+        return space.wrap(self.get_dtype(space).itemtype.str_format(self))
 
-    def descr_repr(self, space):
-        return space.wrap(self.get_dtype(space).itemtype.str_format(self))
+    def descr_format(self, space, w_spec):
+        return space.format(self.item(space), w_spec)
 
     def descr_int(self, space):
         box = self.convert_to(W_LongBox.get_dtype(space))
@@ -187,6 +186,10 @@
     descr__new__, get_dtype = new_dtype_getter("float64")
 
 
+ at unwrap_spec(self=W_GenericBox)
+def descr_index(space, self):
+    return space.index(self.item(space))
+
 
 W_GenericBox.typedef = TypeDef("generic",
     __module__ = "numpypy",
@@ -194,7 +197,8 @@
     __new__ = interp2app(W_GenericBox.descr__new__.im_func),
 
     __str__ = interp2app(W_GenericBox.descr_str),
-    __repr__ = interp2app(W_GenericBox.descr_repr),
+    __repr__ = interp2app(W_GenericBox.descr_str),
+    __format__ = interp2app(W_GenericBox.descr_format),
     __int__ = interp2app(W_GenericBox.descr_int),
     __float__ = interp2app(W_GenericBox.descr_float),
     __nonzero__ = interp2app(W_GenericBox.descr_nonzero),
@@ -245,6 +249,8 @@
 W_BoolBox.typedef = TypeDef("bool_", W_GenericBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_BoolBox.descr__new__.im_func),
+
+    __index__ = interp2app(descr_index),
 )
 
 W_NumberBox.typedef = TypeDef("number", W_GenericBox.typedef,
@@ -266,36 +272,43 @@
 W_Int8Box.typedef = TypeDef("int8", W_SignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int8Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_UInt8Box.typedef = TypeDef("uint8", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt8Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_Int16Box.typedef = TypeDef("int16", W_SignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int16Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_UInt16Box.typedef = TypeDef("uint16", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt16Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_Int32Box.typedef = TypeDef("int32", (W_SignedIntegerBox.typedef,) + MIXIN_32,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int32Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_UInt32Box.typedef = TypeDef("uint32", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt32Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_Int64Box.typedef = TypeDef("int64", (W_SignedIntegerBox.typedef,) + MIXIN_64,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int64Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 if LONG_BIT == 32:
@@ -308,6 +321,7 @@
 W_UInt64Box.typedef = TypeDef("uint64", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt64Box.descr__new__.im_func),
+    __index__ = interp2app(descr_index),
 )
 
 W_InexactBox.typedef = TypeDef("inexact", W_NumberBox.typedef,
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -779,8 +779,6 @@
     """
     Intermediate class for performing binary operations.
     """
-    _immutable_fields_ = ['left', 'right']
-
     def __init__(self, ufunc, name, shape, calc_dtype, res_dtype, left, right):
         VirtualArray.__init__(self, name, shape, res_dtype)
         self.ufunc = ufunc
@@ -856,8 +854,6 @@
                                          self.right.create_sig(), done_func)
 
 class AxisReduce(Call2):
-    _immutable_fields_ = ['left', 'right']
-
     def __init__(self, ufunc, name, identity, shape, dtype, left, right, dim):
         Call2.__init__(self, ufunc, name, shape, dtype, dtype,
                        left, right)
diff --git a/pypy/module/micronumpy/interp_support.py b/pypy/module/micronumpy/interp_support.py
--- a/pypy/module/micronumpy/interp_support.py
+++ b/pypy/module/micronumpy/interp_support.py
@@ -3,7 +3,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.module.micronumpy import interp_dtype
 from pypy.objspace.std.strutil import strip_spaces
-
+from pypy.rlib import jit
 
 FLOAT_SIZE = rffi.sizeof(lltype.Float)
 
@@ -72,11 +72,20 @@
             "string is smaller than requested size"))
         
     a = W_NDimArray(count, [count], dtype=dtype)
-    for i in range(count):
+    fromstring_loop(a, count, dtype, itemsize, s)
+    return space.wrap(a)
+
+fromstring_driver = jit.JitDriver(greens=[], reds=['count', 'i', 'itemsize',
+                                                   'dtype', 's', 'a'])
+
+def fromstring_loop(a, count, dtype, itemsize, s):
+    i = 0
+    while i < count:
+        fromstring_driver.jit_merge_point(a=a, count=count, dtype=dtype,
+                                          itemsize=itemsize, s=s, i=i)
         val = dtype.itemtype.runpack_str(s[i*itemsize:i*itemsize + itemsize])
         a.dtype.setitem(a.storage, i, val)
-        
-    return space.wrap(a)
+        i += 1
 
 @unwrap_spec(s=str, count=int, sep=str)
 def fromstring(space, s, w_dtype=None, count=-1, sep=''):
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -435,7 +435,11 @@
             ("arcsin", "arcsin", 1, {"promote_to_float": True}),
             ("arccos", "arccos", 1, {"promote_to_float": True}),
             ("arctan", "arctan", 1, {"promote_to_float": True}),
+            ("sinh", "sinh", 1, {"promote_to_float": True}),
+            ("cosh", "cosh", 1, {"promote_to_float": True}),
+            ("tanh", "tanh", 1, {"promote_to_float": True}),
             ("arcsinh", "arcsinh", 1, {"promote_to_float": True}),
+            ("arccosh", "arccosh", 1, {"promote_to_float": True}),
             ("arctanh", "arctanh", 1, {"promote_to_float": True}),
         ]:
             self.add_ufunc(space, *ufunc_def)
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -371,6 +371,8 @@
         assert type(a[1]) is numpy.float64
         assert numpy.dtype(float).type is numpy.float64
 
+        assert "{:3f}".format(numpy.float64(3)) == "3.000000"
+
         assert numpy.float64(2.0) == 2.0
         assert numpy.float64('23.4') == numpy.float64(23.4)
         raises(ValueError, numpy.float64, '23.2df')
@@ -387,9 +389,9 @@
         assert b.m() == 12
 
     def test_long_as_index(self):
-        skip("waiting for removal of multimethods of __index__")
-        from _numpypy import int_
+        from _numpypy import int_, float64
         assert (1, 2, 3)[int_(1)] == 2
+        raises(TypeError, lambda: (1, 2, 3)[float64(1)])
 
     def test_int(self):
         import sys
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -310,6 +310,33 @@
         b = arctan(a)
         assert math.isnan(b[0])
 
+    def test_sinh(self):
+        import math
+        from _numpypy import array, sinh
+
+        a = array([-1, 0, 1, float('inf'), float('-inf')])
+        b = sinh(a)
+        for i in range(len(a)):
+            assert b[i] == math.sinh(a[i])
+
+    def test_cosh(self):
+        import math
+        from _numpypy import array, cosh
+
+        a = array([-1, 0, 1, float('inf'), float('-inf')])
+        b = cosh(a)
+        for i in range(len(a)):
+            assert b[i] == math.cosh(a[i])
+
+    def test_tanh(self):
+        import math
+        from _numpypy import array, tanh
+
+        a = array([-1, 0, 1, float('inf'), float('-inf')])
+        b = tanh(a)
+        for i in range(len(a)):
+            assert b[i] == math.tanh(a[i])
+
     def test_arcsinh(self):
         import math
         from _numpypy import arcsinh
@@ -318,6 +345,15 @@
             assert math.asinh(v) == arcsinh(v)
         assert math.isnan(arcsinh(float("nan")))
 
+    def test_arccosh(self):
+        import math
+        from _numpypy import arccosh
+
+        for v in [1.0, 1.1, 2]:
+            assert math.acosh(v) == arccosh(v)
+        for v in [-1.0, 0, .99]:
+            assert math.isnan(arccosh(v))
+
     def test_arctanh(self):
         import math
         from _numpypy import arctanh
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -479,38 +479,3 @@
                                 'int_sub': 3,
                                 'jump': 1,
                                 'setinteriorfield_raw': 1})
-
-
-class TestNumpyOld(LLJitMixin):
-    def setup_class(cls):
-        py.test.skip("old")
-        from pypy.module.micronumpy.compile import FakeSpace
-        from pypy.module.micronumpy.interp_dtype import get_dtype_cache
-
-        cls.space = FakeSpace()
-        cls.float64_dtype = get_dtype_cache(cls.space).w_float64dtype
-
-    def test_int32_sum(self):
-        py.test.skip("pypy/jit/backend/llimpl.py needs to be changed to "
-                     "deal correctly with int dtypes for this test to "
-                     "work. skip for now until someone feels up to the task")
-        space = self.space
-        float64_dtype = self.float64_dtype
-        int32_dtype = self.int32_dtype
-
-        def f(n):
-            if NonConstant(False):
-                dtype = float64_dtype
-            else:
-                dtype = int32_dtype
-            ar = W_NDimArray(n, [n], dtype=dtype)
-            i = 0
-            while i < n:
-                ar.get_concrete().setitem(i, int32_dtype.box(7))
-                i += 1
-            v = ar.descr_add(space, ar).descr_sum(space)
-            assert isinstance(v, IntObject)
-            return v.intval
-
-        result = self.meta_interp(f, [5], listops=True, backendopt=True)
-        assert result == f(5)
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -489,10 +489,28 @@
         return math.atan(v)
 
     @simple_unary_op
+    def sinh(self, v):
+        return math.sinh(v)
+
+    @simple_unary_op
+    def cosh(self, v):
+        return math.cosh(v)
+
+    @simple_unary_op
+    def tanh(self, v):
+        return math.tanh(v)
+
+    @simple_unary_op
     def arcsinh(self, v):
         return math.asinh(v)
 
     @simple_unary_op
+    def arccosh(self, v):
+        if v < 1.0:
+            return rfloat.NAN
+        return math.acosh(v)
+
+    @simple_unary_op
     def arctanh(self, v):
         if v == 1.0 or v == -1.0:
             return math.copysign(rfloat.INFINITY, v)
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
--- a/pypy/module/pypyjit/interp_resop.py
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -72,7 +72,7 @@
     Set a compiling hook that will be called each time a loop is optimized,
     but before assembler compilation. This allows to add additional
     optimizations on Python level.
-    
+
     The hook will be called with the following signature:
     hook(jitdriver_name, loop_type, greenkey or guard_number, operations)
 
@@ -121,13 +121,14 @@
             ofs = ops_offset.get(op, 0)
         if op.opnum == rop.DEBUG_MERGE_POINT:
             jd_sd = jitdrivers_sd[op.getarg(0).getint()]
-            greenkey = op.getarglist()[2:]
+            greenkey = op.getarglist()[3:]
             repr = jd_sd.warmstate.get_location_str(greenkey)
             w_greenkey = wrap_greenkey(space, jd_sd.jitdriver, greenkey, repr)
             l_w.append(DebugMergePoint(space, jit_hooks._cast_to_gcref(op),
                                        logops.repr_of_resop(op),
                                        jd_sd.jitdriver.name,
                                        op.getarg(1).getint(),
+                                       op.getarg(2).getint(),
                                        w_greenkey))
         else:
             l_w.append(WrappedOp(jit_hooks._cast_to_gcref(op), ofs,
@@ -164,14 +165,16 @@
         llres = res.llbox
     return WrappedOp(jit_hooks.resop_new(num, args, llres), offset, repr)
 
- at unwrap_spec(repr=str, jd_name=str, call_depth=int)
-def descr_new_dmp(space, w_tp, w_args, repr, jd_name, call_depth, w_greenkey):
+ at unwrap_spec(repr=str, jd_name=str, call_depth=int, call_id=int)
+def descr_new_dmp(space, w_tp, w_args, repr, jd_name, call_depth, call_id,
+    w_greenkey):
+
     args = [space.interp_w(WrappedBox, w_arg).llbox for w_arg in
             space.listview(w_args)]
     num = rop.DEBUG_MERGE_POINT
     return DebugMergePoint(space,
                            jit_hooks.resop_new(num, args, jit_hooks.emptyval()),
-                           repr, jd_name, call_depth, w_greenkey)
+                           repr, jd_name, call_depth, call_id, w_greenkey)
 
 class WrappedOp(Wrappable):
     """ A class representing a single ResOperation, wrapped nicely
@@ -206,10 +209,13 @@
         jit_hooks.resop_setresult(self.op, box.llbox)
 
 class DebugMergePoint(WrappedOp):
-    def __init__(self, space, op, repr_of_resop, jd_name, call_depth, w_greenkey):
+    def __init__(self, space, op, repr_of_resop, jd_name, call_depth, call_id,
+        w_greenkey):
+
         WrappedOp.__init__(self, op, -1, repr_of_resop)
         self.jd_name = jd_name
         self.call_depth = call_depth
+        self.call_id = call_id
         self.w_greenkey = w_greenkey
 
     def get_pycode(self, space):
@@ -246,6 +252,7 @@
     pycode = GetSetProperty(DebugMergePoint.get_pycode),
     bytecode_no = GetSetProperty(DebugMergePoint.get_bytecode_no),
     call_depth = interp_attrproperty("call_depth", cls=DebugMergePoint),
+    call_id = interp_attrproperty("call_id", cls=DebugMergePoint),
     jitdriver_name = GetSetProperty(DebugMergePoint.get_jitdriver_name),
 )
 DebugMergePoint.acceptable_as_base_class = False
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -127,7 +127,7 @@
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
                        'posix', '_socket', '_sre', '_lsprof', '_weakref',
                        '__pypy__', 'cStringIO', '_collections', 'struct',
-                       'mmap', 'marshal', '_codecs']:
+                       'mmap', 'marshal', '_codecs', 'rctime']:
             if modname == 'pypyjit' and 'interp_resop' in rest:
                 return False
             return True
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -54,7 +54,7 @@
         oplist = parse("""
         [i1, i2, p2]
         i3 = int_add(i1, i2)
-        debug_merge_point(0, 0, 0, 0, ConstPtr(ptr0))
+        debug_merge_point(0, 0, 0, 0, 0, ConstPtr(ptr0))
         guard_nonnull(p2) []
         guard_true(i3) []
         """, namespace={'ptr0': code_gcref}).operations
@@ -87,7 +87,7 @@
         def interp_on_abort():
             pypy_hooks.on_abort(ABORT_TOO_LONG, pypyjitdriver, greenkey,
                                 'blah')
-        
+
         cls.w_on_compile = space.wrap(interp2app(interp_on_compile))
         cls.w_on_compile_bridge = space.wrap(interp2app(interp_on_compile_bridge))
         cls.w_on_abort = space.wrap(interp2app(interp_on_abort))
@@ -105,7 +105,7 @@
 
         def hook(name, looptype, tuple_or_guard_no, ops, asmstart, asmlen):
             all.append((name, looptype, tuple_or_guard_no, ops))
-        
+
         self.on_compile()
         pypyjit.set_compile_hook(hook)
         assert not all
@@ -123,6 +123,7 @@
         assert dmp.pycode is self.f.func_code
         assert dmp.greenkey == (self.f.func_code, 0, False)
         assert dmp.call_depth == 0
+        assert dmp.call_id == 0
         assert int_add.name == 'int_add'
         assert int_add.num == self.int_add_num
         self.on_compile_bridge()
@@ -151,18 +152,18 @@
     def test_non_reentrant(self):
         import pypyjit
         l = []
-        
+
         def hook(*args):
             l.append(None)
             self.on_compile()
             self.on_compile_bridge()
-        
+
         pypyjit.set_compile_hook(hook)
         self.on_compile()
         assert len(l) == 1 # and did not crash
         self.on_compile_bridge()
         assert len(l) == 2 # and did not crash
-        
+
     def test_on_compile_types(self):
         import pypyjit
         l = []
@@ -182,7 +183,7 @@
 
         def hook(jitdriver_name, greenkey, reason):
             l.append((jitdriver_name, reason))
-        
+
         pypyjit.set_abort_hook(hook)
         self.on_abort()
         assert l == [('pypyjit', 'ABORT_TOO_LONG')]
@@ -224,13 +225,14 @@
         def f():
             pass
 
-        op = DebugMergePoint([Box(0)], 'repr', 'pypyjit', 2, (f.func_code, 0, 0))
+        op = DebugMergePoint([Box(0)], 'repr', 'pypyjit', 2, 3, (f.func_code, 0, 0))
         assert op.bytecode_no == 0
         assert op.pycode is f.func_code
         assert repr(op) == 'repr'
         assert op.jitdriver_name == 'pypyjit'
         assert op.num == self.dmp_num
         assert op.call_depth == 2
-        op = DebugMergePoint([Box(0)], 'repr', 'notmain', 5, ('str',))
+        assert op.call_id == 3
+        op = DebugMergePoint([Box(0)], 'repr', 'notmain', 5, 4, ('str',))
         raises(AttributeError, 'op.pycode')
         assert op.call_depth == 5
diff --git a/pypy/module/pypyjit/test/test_policy.py b/pypy/module/pypyjit/test/test_policy.py
--- a/pypy/module/pypyjit/test/test_policy.py
+++ b/pypy/module/pypyjit/test/test_policy.py
@@ -38,6 +38,10 @@
     assert pypypolicy.look_inside_function(Local.getdict.im_func)
     assert pypypolicy.look_inside_function(get_ident)
 
+def test_time():
+    from pypy.module.rctime.interp_time import time
+    assert pypypolicy.look_inside_function(time)
+
 def test_pypy_module():
     from pypy.module._collections.interp_deque import W_Deque
     from pypy.module._random.interp_random import W_Random
diff --git a/pypy/module/pypyjit/test_pypy_c/test_00_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_00_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -60,6 +60,9 @@
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
         stdout, stderr = pipe.communicate()
+        if getattr(pipe, 'returncode', 0) < 0:
+            raise IOError("subprocess was killed by signal %d" % (
+                pipe.returncode,))
         if stderr.startswith('SKIP:'):
             py.test.skip(stderr)
         if stderr.startswith('debug_alloc.h:'):   # lldebug builds
diff --git a/pypy/module/pypyjit/test_pypy_c/test_alloc.py b/pypy/module/pypyjit/test_pypy_c/test_alloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_alloc.py
@@ -0,0 +1,26 @@
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestAlloc(BaseTestPyPyC):
+
+    SIZES = dict.fromkeys([2 ** n for n in range(26)] +     # up to 32MB
+                          [2 ** n - 1 for n in range(26)])
+
+    def test_newstr_constant_size(self):
+        for size in TestAlloc.SIZES:
+            yield self.newstr_constant_size, size
+
+    def newstr_constant_size(self, size):
+        src = """if 1:
+                    N = %(size)d
+                    part_a = 'a' * N
+                    part_b = 'b' * N
+                    for i in xrange(20):
+                        ao = '%%s%%s' %% (part_a, part_b)
+                    def main():
+                        return 42
+""" % {'size': size}
+        log = self.run(src, [], threshold=10)
+        assert log.result == 42
+        loop, = log.loops_by_filename(self.filepath)
+        # assert did not crash
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
--- a/pypy/module/pypyjit/test_pypy_c/test_instance.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -201,3 +201,28 @@
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match_by_id("compare", "") # optimized away
 
+    def test_super(self):
+        def main():
+            class A(object):
+                def m(self, x):
+                    return x + 1
+            class B(A):
+                def m(self, x):
+                    return super(B, self).m(x)
+            i = 0
+            while i < 300:
+                i = B().m(i)
+            return i
+
+        log = self.run(main, [])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i78 = int_lt(i72, 300)
+            guard_true(i78, descr=...)
+            guard_not_invalidated(descr=...)
+            i79 = force_token()
+            i80 = force_token()
+            i81 = int_add(i72, 1)
+            --TICK--
+            jump(..., descr=...)
+        """)
diff --git a/pypy/module/rctime/interp_time.py b/pypy/module/rctime/interp_time.py
--- a/pypy/module/rctime/interp_time.py
+++ b/pypy/module/rctime/interp_time.py
@@ -24,6 +24,7 @@
     from pypy.module.thread import ll_thread as thread
 
     eci = ExternalCompilationInfo(
+        post_include_bits = ["BOOL pypy_timemodule_setCtrlHandler(HANDLE event);"],
         separate_module_sources=['''
             #include <windows.h>
 
diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py
--- a/pypy/module/select/__init__.py
+++ b/pypy/module/select/__init__.py
@@ -22,6 +22,13 @@
             if value is not None:
                 interpleveldefs[symbol] = "space.wrap(%r)" % value
 
+    if 'bsd' in sys.platform or sys.platform.startswith('darwin'):
+        interpleveldefs["kqueue"] = "interp_kqueue.W_Kqueue"
+        interpleveldefs["kevent"] = "interp_kqueue.W_Kevent"
+        from pypy.module.select.interp_kqueue import symbol_map
+        for symbol in symbol_map:
+            interpleveldefs[symbol] = "space.wrap(interp_kqueue.%s)" % symbol
+
     def buildloaders(cls):
         from pypy.rlib import rpoll
         for name in rpoll.eventnames:
diff --git a/pypy/module/select/interp_epoll.py b/pypy/module/select/interp_epoll.py
--- a/pypy/module/select/interp_epoll.py
+++ b/pypy/module/select/interp_epoll.py
@@ -58,7 +58,7 @@
 )
 epoll_wait = rffi.llexternal(
     "epoll_wait",
-    [rffi.INT, lltype.Ptr(rffi.CArray(epoll_event)), rffi.INT, rffi.INT],
+    [rffi.INT, rffi.CArrayPtr(epoll_event), rffi.INT, rffi.INT],
     rffi.INT,
     compilation_info=eci,
 )
diff --git a/pypy/module/select/interp_kqueue.py b/pypy/module/select/interp_kqueue.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/interp_kqueue.py
@@ -0,0 +1,343 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt, exception_from_errno
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, generic_new_descr, GetSetProperty
+from pypy.rlib._rsocket_rffi import socketclose
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rpython.tool import rffi_platform
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+
+eci = ExternalCompilationInfo(
+    includes = ["sys/types.h",
+                "sys/event.h",
+                "sys/time.h"],
+)
+
+
+class CConfig:
+    _compilation_info_ = eci
+
+
+CConfig.kevent = rffi_platform.Struct("struct kevent", [
+    ("ident", rffi.UINTPTR_T),
+    ("filter", rffi.SHORT),
+    ("flags", rffi.USHORT),
+    ("fflags", rffi.UINT),
+    ("data", rffi.INTPTR_T),
+    ("udata", rffi.VOIDP),
+])
+
+
+CConfig.timespec = rffi_platform.Struct("struct timespec", [
+    ("tv_sec", rffi.TIME_T),
+    ("tv_nsec", rffi.LONG),
+])
+
+
+symbol_map = {
+    "KQ_FILTER_READ": "EVFILT_READ",
+    "KQ_FILTER_WRITE": "EVFILT_WRITE",
+    "KQ_FILTER_AIO": "EVFILT_AIO",
+    "KQ_FILTER_VNODE": "EVFILT_VNODE",
+    "KQ_FILTER_PROC": "EVFILT_PROC",
+#    "KQ_FILTER_NETDEV": None, # deprecated on FreeBSD .. no longer defined
+    "KQ_FILTER_SIGNAL": "EVFILT_SIGNAL",
+    "KQ_FILTER_TIMER": "EVFILT_TIMER",
+    "KQ_EV_ADD": "EV_ADD",
+    "KQ_EV_DELETE": "EV_DELETE",
+    "KQ_EV_ENABLE": "EV_ENABLE",
+    "KQ_EV_DISABLE": "EV_DISABLE",
+    "KQ_EV_ONESHOT": "EV_ONESHOT",
+    "KQ_EV_CLEAR": "EV_CLEAR",
+#    "KQ_EV_SYSFLAGS": None, # Python docs says "internal event" .. not defined on FreeBSD
+#    "KQ_EV_FLAG1": None, # Python docs says "internal event" .. not defined on FreeBSD
+    "KQ_EV_EOF": "EV_EOF",
+    "KQ_EV_ERROR": "EV_ERROR"
+}
+
+for symbol in symbol_map.values():
+    setattr(CConfig, symbol, rffi_platform.DefinedConstantInteger(symbol))
+
+cconfig = rffi_platform.configure(CConfig)
+
+kevent = cconfig["kevent"]
+timespec = cconfig["timespec"]
+
+for symbol in symbol_map:
+    globals()[symbol] = cconfig[symbol_map[symbol]]
+
+
+syscall_kqueue = rffi.llexternal(
+    "kqueue",
+    [],
+    rffi.INT,
+    compilation_info=eci
+)
+
+syscall_kevent = rffi.llexternal(
+    "kevent",
+    [rffi.INT,
+     lltype.Ptr(rffi.CArray(kevent)),
+     rffi.INT,
+     lltype.Ptr(rffi.CArray(kevent)),
+     rffi.INT,
+     lltype.Ptr(timespec)
+    ],
+    rffi.INT,
+    compilation_info=eci
+)
+
+
+class W_Kqueue(Wrappable):
+    def __init__(self, space, kqfd):
+        self.kqfd = kqfd
+
+    def descr__new__(space, w_subtype):
+        kqfd = syscall_kqueue()
+        if kqfd < 0:
+            raise exception_from_errno(space, space.w_IOError)
+        return space.wrap(W_Kqueue(space, kqfd))
+
+    @unwrap_spec(fd=int)
+    def descr_fromfd(space, w_cls, fd):
+        return space.wrap(W_Kqueue(space, fd))
+
+    def __del__(self):
+        self.close()
+
+    def get_closed(self):
+        return self.kqfd < 0
+
+    def close(self):
+        if not self.get_closed():
+            kqfd = self.kqfd
+            self.kqfd = -1
+            socketclose(kqfd)
+
+    def check_closed(self, space):
+        if self.get_closed():
+            raise OperationError(space.w_ValueError, space.wrap("I/O operation on closed kqueue fd"))
+
+    def descr_get_closed(self, space):
+        return space.wrap(self.get_closed())
+
+    def descr_fileno(self, space):
+        self.check_closed(space)
+        return space.wrap(self.kqfd)
+
+    def descr_close(self, space):
+        self.close()
+
+    @unwrap_spec(max_events=int)
+    def descr_control(self, space, w_changelist, max_events, w_timeout=None):
+
+        self.check_closed(space)
+
+        if max_events < 0:
+            raise operationerrfmt(space.w_ValueError,
+                "Length of eventlist must be 0 or positive, got %d", max_events
+            )
+
+        if space.is_w(w_changelist, space.w_None):
+            changelist_len = 0
+        else:
+            changelist_len = space.len_w(w_changelist)
+
+        with lltype.scoped_alloc(rffi.CArray(kevent), changelist_len) as changelist:
+            with lltype.scoped_alloc(rffi.CArray(kevent), max_events) as eventlist:
+                with lltype.scoped_alloc(timespec) as timeout:
+
+                    if not space.is_w(w_timeout, space.w_None):
+                        _timeout = space.float_w(w_timeout)
+                        if _timeout < 0:
+                            raise operationerrfmt(space.w_ValueError,
+                                "Timeout must be None or >= 0, got %s", str(_timeout)
+                            )
+                        sec = int(_timeout)
+                        nsec = int(1e9 * (_timeout - sec))
+                        rffi.setintfield(timeout, 'c_tv_sec', sec)
+                        rffi.setintfield(timeout, 'c_tv_nsec', nsec)
+                        ptimeout = timeout
+                    else:
+                        ptimeout = lltype.nullptr(timespec)
+
+                    if not space.is_w(w_changelist, space.w_None):
+                        i = 0
+                        for w_ev in space.listview(w_changelist):
+                            ev = space.interp_w(W_Kevent, w_ev)
+                            changelist[i].c_ident = ev.event.c_ident
+                            changelist[i].c_filter = ev.event.c_filter
+                            changelist[i].c_flags = ev.event.c_flags
+                            changelist[i].c_fflags = ev.event.c_fflags
+                            changelist[i].c_data = ev.event.c_data
+                            changelist[i].c_udata = ev.event.c_udata
+                            i += 1
+                        pchangelist = changelist
+                    else:
+                        pchangelist = lltype.nullptr(rffi.CArray(kevent))
+
+                    nfds = syscall_kevent(self.kqfd,
+                                          pchangelist,
+                                          changelist_len,
+                                          eventlist,
+                                          max_events,
+                                          ptimeout)
+                    if nfds < 0:
+                        raise exception_from_errno(space, space.w_IOError)
+                    else:
+                        elist_w = [None] * nfds
+                        for i in xrange(nfds):
+
+                            evt = eventlist[i]
+
+                            w_event = W_Kevent(space)
+                            w_event.event = lltype.malloc(kevent, flavor="raw")
+                            w_event.event.c_ident = evt.c_ident
+                            w_event.event.c_filter = evt.c_filter
+                            w_event.event.c_flags = evt.c_flags
+                            w_event.event.c_fflags = evt.c_fflags
+                            w_event.event.c_data = evt.c_data
+                            w_event.event.c_udata = evt.c_udata
+
+                            elist_w[i] = w_event
+
+                    return space.newlist(elist_w)
+
+
+W_Kqueue.typedef = TypeDef("select.kqueue",
+    __new__ = interp2app(W_Kqueue.descr__new__.im_func),
+    fromfd = interp2app(W_Kqueue.descr_fromfd.im_func, as_classmethod=True),
+
+    closed = GetSetProperty(W_Kqueue.descr_get_closed),
+    fileno = interp2app(W_Kqueue.descr_fileno),
+
+    close = interp2app(W_Kqueue.descr_close),
+    control = interp2app(W_Kqueue.descr_control),
+)
+W_Kqueue.typedef.acceptable_as_base_class = False
+
+
+class W_Kevent(Wrappable):
+    def __init__(self, space):
+        self.event = lltype.nullptr(kevent)
+
+    def __del__(self):
+        if self.event:
+            lltype.free(self.event, flavor="raw")
+
+    @unwrap_spec(filter=int, flags='c_uint', fflags='c_uint', data=int, udata='c_uint')
+    def descr__init__(self, space, w_ident, filter=KQ_FILTER_READ, flags=KQ_EV_ADD, fflags=0, data=0, udata=0):
+        ident = space.c_filedescriptor_w(w_ident)
+
+        self.event = lltype.malloc(kevent, flavor="raw")
+        rffi.setintfield(self.event, "c_ident", ident)
+        rffi.setintfield(self.event, "c_filter", filter)
+        rffi.setintfield(self.event, "c_flags", flags)
+        rffi.setintfield(self.event, "c_fflags", fflags)
+        rffi.setintfield(self.event, "c_data", data)
+        self.event.c_udata = rffi.cast(rffi.VOIDP, udata)
+
+    def _compare_all_fields(self, other, op):
+        l_ident = self.event.c_ident
+        r_ident = other.event.c_ident
+        l_filter = rffi.cast(lltype.Signed, self.event.c_filter)
+        r_filter = rffi.cast(lltype.Signed, other.event.c_filter)
+        l_flags = rffi.cast(lltype.Unsigned, self.event.c_flags)
+        r_flags = rffi.cast(lltype.Unsigned, other.event.c_flags)
+        l_fflags = rffi.cast(lltype.Unsigned, self.event.c_fflags)
+        r_fflags = rffi.cast(lltype.Unsigned, other.event.c_fflags)
+        l_data = self.event.c_data
+        r_data = other.event.c_data
+        l_udata = rffi.cast(lltype.Unsigned, self.event.c_udata)
+        r_udata = rffi.cast(lltype.Unsigned, other.event.c_udata)
+
+        if op == "eq":
+            return l_ident == r_ident and \
+                   l_filter == r_filter and \
+                   l_flags == r_flags and \
+                   l_fflags == r_fflags and \
+                   l_data == r_data and \
+                   l_udata == r_udata
+        elif op == "lt":
+            return (l_ident < r_ident) or \
+                   (l_ident == r_ident and l_filter < r_filter) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags < r_flags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags < r_fflags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data < r_data) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data == r_data and l_udata < r_udata)
+        elif op == "gt":
+            return (l_ident > r_ident) or \
+                   (l_ident == r_ident and l_filter > r_filter) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags > r_flags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags > r_fflags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data > r_data) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data == r_data and l_udata > r_udata)
+        else:
+            assert False
+
+    def compare_all_fields(self, space, other, op):
+        if not space.interp_w(W_Kevent, other):
+            if op == "eq":
+                return False
+            elif op == "ne":
+                return True
+            else:
+                raise OperationError(space.w_TypeError, space.wrap('cannot compare kevent to incompatible type'))
+        return self._compare_all_fields(space.interp_w(W_Kevent, other), op)
+
+    def descr__eq__(self, space, w_other):
+        return space.wrap(self.compare_all_fields(space, w_other, "eq"))
+
+    def descr__ne__(self, space, w_other):
+        return space.wrap(not self.compare_all_fields(space, w_other, "eq"))
+
+    def descr__le__(self, space, w_other):
+        return space.wrap(not self.compare_all_fields(space, w_other, "gt"))
+
+    def descr__lt__(self, space, w_other):
+        return space.wrap(self.compare_all_fields(space, w_other, "lt"))
+
+    def descr__ge__(self, space, w_other):
+        return space.wrap(not self.compare_all_fields(space, w_other, "lt"))
+
+    def descr__gt__(self, space, w_other):
+        return space.wrap(self.compare_all_fields(space, w_other, "gt"))
+
+    def descr_get_ident(self, space):
+        return space.wrap(self.event.c_ident)
+
+    def descr_get_filter(self, space):
+        return space.wrap(self.event.c_filter)
+
+    def descr_get_flags(self, space):
+        return space.wrap(self.event.c_flags)
+
+    def descr_get_fflags(self, space):
+        return space.wrap(self.event.c_fflags)
+
+    def descr_get_data(self, space):
+        return space.wrap(self.event.c_data)
+
+    def descr_get_udata(self, space):
+        return space.wrap(rffi.cast(rffi.SIZE_T, self.event.c_udata))
+
+
+W_Kevent.typedef = TypeDef("select.kevent",
+    __new__ = generic_new_descr(W_Kevent),
+    __init__ = interp2app(W_Kevent.descr__init__),
+    __eq__ = interp2app(W_Kevent.descr__eq__),
+    __ne__ = interp2app(W_Kevent.descr__ne__),
+    __le__ = interp2app(W_Kevent.descr__le__),
+    __lt__ = interp2app(W_Kevent.descr__lt__),
+    __ge__ = interp2app(W_Kevent.descr__ge__),
+    __gt__ = interp2app(W_Kevent.descr__gt__),
+
+    ident = GetSetProperty(W_Kevent.descr_get_ident),
+    filter = GetSetProperty(W_Kevent.descr_get_filter),
+    flags = GetSetProperty(W_Kevent.descr_get_flags),
+    fflags = GetSetProperty(W_Kevent.descr_get_fflags),
+    data = GetSetProperty(W_Kevent.descr_get_data),
+    udata = GetSetProperty(W_Kevent.descr_get_udata),
+)
+W_Kevent.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/select/test/test_kqueue.py b/pypy/module/select/test/test_kqueue.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/test/test_kqueue.py
@@ -0,0 +1,190 @@
+# adapted from CPython: Lib/test/test_kqueue.py
+
+import py
+import sys
+
+from pypy.conftest import gettestobjspace
+
+
+class AppTestKqueue(object):
+    def setup_class(cls):
+        if not 'bsd' in sys.platform and \
+           not sys.platform.startswith('darwin'):
+            py.test.skip("test requires BSD")
+        cls.space = gettestobjspace(usemodules=["select", "_socket", "posix"])
+
+    def test_create(self):
+        import select
+
+        kq = select.kqueue()
+        assert kq.fileno() > 0
+        assert not kq.closed
+        kq.close()
+        assert kq.closed
+        raises(ValueError, kq.fileno)
+
+    def test_create_event(self):
+        import select
+        import sys
+
+        fd = sys.stderr.fileno()
+        ev = select.kevent(fd)
+        other = select.kevent(1000)
+        assert ev.ident == fd
+        assert ev.filter == select.KQ_FILTER_READ
+        assert ev.flags == select.KQ_EV_ADD
+        assert ev.fflags == 0
+        assert ev.data == 0
+        assert ev.udata == 0
+        assert ev == ev
+        assert ev != other
+        assert cmp(ev, other) == -1
+        assert ev < other
+        assert other >= ev
+        raises(TypeError, cmp, ev, None)
+        raises(TypeError, cmp, ev, 1)
+        raises(TypeError, cmp, ev, "ev")
+
+        ev = select.kevent(fd, select.KQ_FILTER_WRITE)
+        assert ev.ident == fd
+        assert ev.filter == select.KQ_FILTER_WRITE
+        assert ev.flags == select.KQ_EV_ADD
+        assert ev.fflags == 0
+        assert ev.data == 0
+        assert ev.udata == 0
+        assert ev == ev
+        assert ev != other
+
+        ev = select.kevent(fd, select.KQ_FILTER_WRITE, select.KQ_EV_ONESHOT)
+        assert ev.ident == fd
+        assert ev.filter == select.KQ_FILTER_WRITE
+        assert ev.flags == select.KQ_EV_ONESHOT
+        assert ev.fflags == 0
+        assert ev.data == 0
+        assert ev.udata == 0
+        assert ev == ev
+        assert ev != other
+
+        ev = select.kevent(1, 2, 3, 4, 5, 6)
+        assert ev.ident == 1
+        assert ev.filter == 2
+        assert ev.flags == 3
+        assert ev.fflags == 4
+        assert ev.data == 5
+        assert ev.udata == 6
+        assert ev == ev
+        assert ev != other
+
+        bignum = (sys.maxsize * 2 + 1) & 0xffffffff
+        fd = sys.maxsize
+        ev = select.kevent(fd, 1, 2, bignum, sys.maxsize, bignum)
+        assert ev.ident == fd
+        assert ev.filter == 1
+        assert ev.flags == 2
+        assert ev.fflags == bignum
+        assert ev.data == sys.maxsize
+        assert ev.udata == bignum
+        assert ev == ev
+        assert ev != other
+
+    def test_queue_event(self):
+        import errno
+        import select
+        import socket
+        import sys
+
+        server_socket = socket.socket()
+        server_socket.bind(("127.0.0.1", 0))
+        server_socket.listen(1)
+        client = socket.socket()
+        client.setblocking(False)
+        try:
+            client.connect(("127.0.0.1", server_socket.getsockname()[1]))
+        except socket.error as e:
+            if 'bsd' in sys.platform:
+                assert e.args[0] == errno.ENOENT
+            else:
+                assert e.args[0] == errno.EINPROGRESS
+        server, addr = server_socket.accept()
+
+        if sys.platform.startswith("darwin"):
+            flags = select.KQ_EV_ADD | select.KQ_EV_ENABLE
+        else:
+            flags = 0
+
+        kq1 = select.kqueue()
+        kq2 = select.kqueue.fromfd(kq1.fileno())
+
+        ev = select.kevent(server.fileno(), select.KQ_FILTER_WRITE, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq1.control([ev], 0)
+        ev = select.kevent(server.fileno(), select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq1.control([ev], 0)
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_WRITE, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq2.control([ev], 0)
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq2.control([ev], 0)
+
+        events = kq1.control(None, 4, 1)
+        events = [(e.ident, e.filter, e.flags) for e in events]
+        events.sort()
+        assert events == [
+            (client.fileno(), select.KQ_FILTER_WRITE, flags),
+            (server.fileno(), select.KQ_FILTER_WRITE, flags),
+        ]
+        client.send("Hello!")
+        server.send("world!!!")
+
+        for i in xrange(10):
+            events = kq1.control(None, 4, 1)
+            if len(events) == 4:
+                break
+            time.sleep(1.0)
+        else:
+            assert False, "timeout waiting for event notification"
+
+        events = [(e.ident, e.filter, e.flags) for e in events]
+        events.sort()
+        assert events == [
+            (client.fileno(), select.KQ_FILTER_WRITE, flags),
+            (client.fileno(), select.KQ_FILTER_READ, flags),
+            (server.fileno(), select.KQ_FILTER_WRITE, flags),
+            (server.fileno(), select.KQ_FILTER_READ, flags),
+        ]
+
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_WRITE, select.KQ_EV_DELETE)
+        kq1.control([ev], 0)
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_READ, select.KQ_EV_DELETE)
+        kq1.control([ev], 0)
+        ev = select.kevent(server.fileno(), select.KQ_FILTER_READ, select.KQ_EV_DELETE)
+        kq1.control([ev], 0, 0)
+
+        events = kq1.control([], 4, 0.99)
+        events = [(e.ident, e.filter, e.flags) for e in events]
+        events.sort()
+        assert events == [
+            (server.fileno(), select.KQ_FILTER_WRITE, flags),
+        ]
+
+        client.close()
+        server.close()
+        server_socket.close()
+
+    def test_pair(self):
+        import select
+        import socket
+
+        kq = select.kqueue()
+        a, b = socket.socketpair()
+
+        a.send('foo')
+        event1 = select.kevent(a, select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        event2 = select.kevent(b, select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        r = kq.control([event1, event2], 1, 1)
+        assert r
+        assert r[0].flags & select.KQ_EV_ERROR == 0
+        data = b.recv(r[0].data)
+        assert data == 'foo'
+
+        a.close()
+        b.close()
+        kq.close()
diff --git a/pypy/module/select/test/test_ztranslation.py b/pypy/module/select/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/test/test_ztranslation.py
@@ -0,0 +1,5 @@
+
+from pypy.objspace.fake.checkmodule import checkmodule
+
+def test_select_translates():
+    checkmodule('select')
diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -1,5 +1,6 @@
 from pypy.rlib import jit
 from pypy.rlib.objectmodel import specialize
+from pypy.rlib.rstring import StringBuilder
 from pypy.rlib.rstruct.error import StructError
 from pypy.rlib.rstruct.formatiterator import FormatIterator
 from pypy.rlib.rstruct.standardfmttable import PACK_ACCEPTS_BROKEN_INPUT
@@ -8,11 +9,11 @@
 
 class PackFormatIterator(FormatIterator):
 
-    def __init__(self, space, args_w):
+    def __init__(self, space, args_w, size):
         self.space = space
         self.args_w = args_w
         self.args_index = 0
-        self.result = []      # list of characters
+        self.result = StringBuilder(size)
 
     # This *should* be always unroll safe, the only way to get here is by
     # unroll the interpret function, which means the fmt is const, and thus
@@ -29,9 +30,8 @@
 
     @jit.unroll_safe
     def align(self, mask):
-        pad = (-len(self.result)) & mask
-        for i in range(pad):
-            self.result.append('\x00')
+        pad = (-self.result.getlength()) & mask
+        self.result.append_multiple_char('\x00', pad)
 
     def finished(self):
         if self.args_index != len(self.args_w):
diff --git a/pypy/module/struct/interp_struct.py b/pypy/module/struct/interp_struct.py
--- a/pypy/module/struct/interp_struct.py
+++ b/pypy/module/struct/interp_struct.py
@@ -1,28 +1,34 @@
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.module.struct.formatiterator import PackFormatIterator, UnpackFormatIterator
+from pypy.rlib import jit
 from pypy.rlib.rstruct.error import StructError
 from pypy.rlib.rstruct.formatiterator import CalcSizeFormatIterator
 
 
 @unwrap_spec(format=str)
 def calcsize(space, format):
+    return space.wrap(_calcsize(space, format))
+
+def _calcsize(space, format):
     fmtiter = CalcSizeFormatIterator()
     try:
         fmtiter.interpret(format)
     except StructError, e:
         raise e.at_applevel(space)
-    return space.wrap(fmtiter.totalsize)
-
+    return fmtiter.totalsize
 
 @unwrap_spec(format=str)
 def pack(space, format, args_w):
-    fmtiter = PackFormatIterator(space, args_w)
+    if jit.isconstant(format):
+        size = _calcsize(space, format)
+    else:
+        size = 8
+    fmtiter = PackFormatIterator(space, args_w, size)
     try:
         fmtiter.interpret(format)
     except StructError, e:
         raise e.at_applevel(space)
-    result = ''.join(fmtiter.result)
-    return space.wrap(result)
+    return space.wrap(fmtiter.result.build())
 
 
 @unwrap_spec(format=str, input='bufferstr')
diff --git a/pypy/module/struct/test/test_ztranslation.py b/pypy/module/struct/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/struct/test/test_ztranslation.py
@@ -0,0 +1,6 @@
+from pypy.objspace.fake.checkmodule import checkmodule
+
+
+def test_checkmodule():
+    checkmodule('struct')
+
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_errno.py b/pypy/module/test_lib_pypy/ctypes_tests/test_errno.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_errno.py
@@ -0,0 +1,21 @@
+import py
+
+import ctypes
+from _ctypes import function
+
+_rawffi = py.test.importorskip("_rawffi")
+
+class TestErrno:
+
+    def test_errno_saved_and_restored(self):
+        def check():
+            assert _rawffi.get_errno() == 42
+            assert ctypes.get_errno() == old
+        check.free_temp_buffers = lambda *args: None
+        f = function.CFuncPtr()
+        old = _rawffi.get_errno()
+        f._flags_ = _rawffi.FUNCFLAG_USE_ERRNO
+        ctypes.set_errno(42)
+        f._call_funcptr(check)
+        assert _rawffi.get_errno() == old
+        ctypes.set_errno(0)
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_fastpath.py
@@ -97,6 +97,16 @@
         tf_b.errcheck = errcheck
         assert tf_b(-126) == 'hello'
 
+    def test_array_to_ptr(self):
+        ARRAY = c_int * 8
+        func = dll._testfunc_ai8
+        func.restype = POINTER(c_int)
+        func.argtypes = [ARRAY]
+        array = ARRAY(1, 2, 3, 4, 5, 6, 7, 8)
+        ptr = func(array)
+        assert ptr[0] == 1
+        assert ptr[7] == 8
+
 
 class TestFallbackToSlowpath(BaseCTypesTestChecker):
 
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_prototypes.py
@@ -246,6 +246,14 @@
         def func(): pass
         CFUNCTYPE(None, c_int * 3)(func)
 
+    def test_array_to_ptr_wrongtype(self):
+        ARRAY = c_byte * 8
+        func = testdll._testfunc_ai8
+        func.restype = POINTER(c_int)
+        func.argtypes = [c_int * 8]
+        array = ARRAY(1, 2, 3, 4, 5, 6, 7, 8)
+        py.test.raises(ArgumentError, "func(array)")
+
 ################################################################
 
 if __name__ == '__main__':
diff --git a/pypy/module/test_lib_pypy/test_collections.py b/pypy/module/test_lib_pypy/test_collections.py
--- a/pypy/module/test_lib_pypy/test_collections.py
+++ b/pypy/module/test_lib_pypy/test_collections.py
@@ -6,7 +6,7 @@
 
 from pypy.conftest import gettestobjspace
 
-class AppTestcStringIO:
+class AppTestCollections:
     def test_copy(self):
         import _collections
         def f():
diff --git a/pypy/module/test_lib_pypy/test_datetime.py b/pypy/module/test_lib_pypy/test_datetime.py
--- a/pypy/module/test_lib_pypy/test_datetime.py
+++ b/pypy/module/test_lib_pypy/test_datetime.py
@@ -3,7 +3,7 @@
 import py
 
 import time
-import datetime
+from lib_pypy import datetime
 import copy
 import os
 
@@ -43,4 +43,4 @@
     dt = datetime.datetime.utcnow()
     assert type(dt.microsecond) is int
 
-    copy.copy(dt)
\ No newline at end of file
+    copy.copy(dt)
diff --git a/pypy/objspace/flow/operation.py b/pypy/objspace/flow/operation.py
--- a/pypy/objspace/flow/operation.py
+++ b/pypy/objspace/flow/operation.py
@@ -30,10 +30,7 @@
 
 def new_style_type(x):
     """Simulate a situation where every class is new-style"""
-    t = getattr(x, '__class__', type(x))
-    if t is types.ClassType:   # guess who's here?  exception classes...
-        t = type
-    return t
+    return getattr(x, '__class__', type(x))
 
 def do_int(x):
     return x.__int__()
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -2,6 +2,7 @@
 from pypy.objspace.std.model import registerimplementation, W_Object
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.settype import set_typedef as settypedef
+from pypy.objspace.std.frozensettype import frozenset_typedef as frozensettypedef
 from pypy.interpreter import gateway
 from pypy.interpreter.argument import Signature
 from pypy.interpreter.error import OperationError, operationerrfmt
@@ -142,6 +143,17 @@
             else:
                 return result
 
+    def popitem(self, w_dict):
+        # this is a bad implementation: if we call popitem() repeatedly,
+        # it ends up taking n**2 time, because the next() calls below
+        # will take longer and longer.  But all interesting strategies
+        # provide a better one.
+        space = self.space
+        iterator = self.iter(w_dict)
+        w_key, w_value = iterator.next()
+        self.delitem(w_dict, w_key)
+        return (w_key, w_value)
+
     def clear(self, w_dict):
         strategy = self.space.fromcache(EmptyDictStrategy)
         storage = strategy.get_empty_storage()
@@ -477,7 +489,7 @@
 
 class _UnwrappedIteratorMixin:
     _mixin_ = True
-    
+
     def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
         self.iterator = strategy.unerase(dictimplementation.dstorage).iteritems()
@@ -826,10 +838,12 @@
         return all_contained_in(space, w_dictview, w_otherview)
     return space.w_False
 eq__DictViewKeys_settypedef = eq__DictViewKeys_DictViewKeys
+eq__DictViewKeys_frozensettypedef = eq__DictViewKeys_DictViewKeys
 
 eq__DictViewKeys_DictViewItems = eq__DictViewKeys_DictViewKeys
 eq__DictViewItems_DictViewItems = eq__DictViewKeys_DictViewKeys
 eq__DictViewItems_settypedef = eq__DictViewItems_DictViewItems
+eq__DictViewItems_frozensettypedef = eq__DictViewItems_DictViewItems
 
 def repr__DictViewKeys(space, w_dictview):
     w_seq = space.call_function(space.w_list, w_dictview)
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -3,7 +3,7 @@
 from pypy.objspace.std.dictmultiobject import W_DictMultiObject, IteratorImplementation
 from pypy.objspace.std.dictmultiobject import DictStrategy
 from pypy.objspace.std.typeobject import unwrap_cell
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.error import OperationError, operationerrfmt
 
 from pypy.rlib import rerased
 
@@ -44,7 +44,8 @@
                 raise
             if not w_type.is_cpytype():
                 raise
-            # xxx obscure workaround: allow cpyext to write to type->tp_dict.
+            # xxx obscure workaround: allow cpyext to write to type->tp_dict
+            # xxx even in the case of a builtin type.
             # xxx like CPython, we assume that this is only done early after
             # xxx the type is created, and we don't invalidate any cache.
             w_type.dict_w[key] = w_value
@@ -86,8 +87,14 @@
                     for (key, w_value) in self.unerase(w_dict.dstorage).dict_w.iteritems()]
 
     def clear(self, w_dict):
-        self.unerase(w_dict.dstorage).dict_w.clear()
-        self.unerase(w_dict.dstorage).mutated(None)
+        space = self.space
+        w_type = self.unerase(w_dict.dstorage)
+        if (not space.config.objspace.std.mutable_builtintypes
+                and not w_type.is_heaptype()):
+            msg = "can't clear dictionary of type '%s'"
+            raise operationerrfmt(space.w_TypeError, msg, w_type.name)
+        w_type.dict_w.clear()
+        w_type.mutated(None)
 
 class DictProxyIteratorImplementation(IteratorImplementation):
     def __init__(self, space, strategy, dictimplementation):
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -7,7 +7,7 @@
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import slicetype
 from pypy.interpreter import gateway, baseobjspace
-from pypy.rlib.objectmodel import instantiate, specialize
+from pypy.rlib.objectmodel import instantiate, specialize, newlist_hint
 from pypy.rlib.listsort import make_timsort_class
 from pypy.rlib import rerased, jit, debug
 from pypy.interpreter.argument import Signature
@@ -32,9 +32,11 @@
     storage = strategy.erase(None)
     return W_ListObject.from_storage_and_strategy(space, storage, strategy)
 
- at jit.look_inside_iff(lambda space, list_w: jit.isconstant(len(list_w)) and len(list_w) < UNROLL_CUTOFF)
-def get_strategy_from_list_objects(space, list_w):
+ at jit.look_inside_iff(lambda space, list_w, sizehint: jit.isconstant(len(list_w)) and len(list_w) < UNROLL_CUTOFF)
+def get_strategy_from_list_objects(space, list_w, sizehint):
     if not list_w:
+        if sizehint != -1:
+            return SizeListStrategy(space, sizehint)
         return space.fromcache(EmptyListStrategy)
 
     # check for ints
@@ -75,11 +77,13 @@
 class W_ListObject(W_AbstractListObject):
     from pypy.objspace.std.listtype import list_typedef as typedef
 
-    def __init__(w_self, space, wrappeditems):
+    def __init__(w_self, space, wrappeditems, sizehint=-1):
         assert isinstance(wrappeditems, list)
         w_self.space = space
         if space.config.objspace.std.withliststrategies:
-            w_self.strategy = get_strategy_from_list_objects(space, wrappeditems)
+            w_self.strategy = get_strategy_from_list_objects(space,
+                                                             wrappeditems,
+                                                             sizehint)
         else:
             w_self.strategy = space.fromcache(ObjectListStrategy)
         w_self.init_from_list_w(wrappeditems)
@@ -255,6 +259,7 @@
 
 
 class ListStrategy(object):
+    sizehint = -1
 
     def __init__(self, space):
         self.space = space
@@ -336,6 +341,7 @@
     def sort(self, w_list, reverse):
         raise NotImplementedError
 
+
 class EmptyListStrategy(ListStrategy):
     """EmptyListStrategy is used when a W_List withouth elements is created.
     The storage is None. When items are added to the W_List a new RPython list
@@ -397,7 +403,7 @@
         else:
             strategy = self.space.fromcache(ObjectListStrategy)
 
-        storage = strategy.get_empty_storage()
+        storage = strategy.get_empty_storage(self.sizehint)
         w_list.strategy = strategy
         w_list.lstorage = storage
 
@@ -438,6 +444,13 @@
     def reverse(self, w_list):
         pass
 
+class SizeListStrategy(EmptyListStrategy):
+    """ Like empty, but when modified it'll preallocate the size to sizehint
+    """
+    def __init__(self, space, sizehint):
+        self.sizehint = sizehint
+        ListStrategy.__init__(self, space)
+
 class RangeListStrategy(ListStrategy):
     """RangeListStrategy is used when a list is created using the range method.
     The storage is a tuple containing only three integers start, step and length
@@ -660,8 +673,10 @@
         l = [self.unwrap(w_item) for w_item in list_w]
         w_list.lstorage = self.erase(l)
 
-    def get_empty_storage(self):
-        return self.erase([])
+    def get_empty_storage(self, sizehint):
+        if sizehint == -1:
+            return self.erase([])
+        return self.erase(newlist_hint(sizehint))
 
     def clone(self, w_list):
         l = self.unerase(w_list.lstorage)
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -16,6 +16,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.interpreter import gateway, unicodehelper
 from pypy.rlib.rstruct import ieee
+from pypy.rlib.rstring import StringBuilder
 
 from pypy.objspace.std.boolobject    import W_BoolObject
 from pypy.objspace.std.complexobject import W_ComplexObject
@@ -153,9 +154,9 @@
 register(TYPE_INT64, unmarshal_Int64)
 
 def pack_float(f):
-    result = []
+    result = StringBuilder(8)
     ieee.pack_float(result, f, 8, False)
-    return ''.join(result)
+    return result.build()
 
 def unpack_float(s):
     return ieee.unpack_float(s, False)
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -10,6 +10,10 @@
 
 
 @specialize.argtype(1)
+ at jit.look_inside_iff(lambda space, s, start, end:
+       jit.isconstant(s) and
+       jit.isconstant(start) and
+       jit.isconstant(end))
 def _parse_int(space, s, start, end):
     """Parse a number and check for overflows"""
     result = 0
@@ -91,9 +95,18 @@
                         if s[i] == "{":
                             i += 1
                             markup_follows = False
-                    # Attach literal data
+                    # Attach literal data, ending with { or }
                     out.append_slice(s, last_literal, i - 1)
                     if not markup_follows:
+                        if self.parser_list_w is not None:
+                            end_literal = i - 1
+                            assert end_literal > last_literal
+                            literal = self.template[last_literal:end_literal]
+                            w_entry = space.newtuple([
+                                space.wrap(literal),
+                                space.w_None, space.w_None, space.w_None])
+                            self.parser_list_w.append(w_entry)
+                            self.last_end = i
                         last_literal = i
                         continue
                     nested = 1
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -300,8 +300,9 @@
         make_sure_not_resized(list_w)
         return wraptuple(self, list_w)
 
-    def newlist(self, list_w):
-        return W_ListObject(self, list_w)
+    def newlist(self, list_w, sizehint=-1):
+        assert not list_w or sizehint == -1
+        return W_ListObject(self, list_w, sizehint)
 
     def newlist_str(self, list_s):
         return W_ListObject.newlist_str(self, list_s)
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -8,6 +8,7 @@
 from pypy.objspace.std.ropeobject import W_RopeObject
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.rlib import rope
+from pypy.rlib.rstring import StringBuilder
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import unicodeobject, slicetype, iterobject
 from pypy.objspace.std.tupleobject import W_TupleObject
@@ -946,15 +947,16 @@
     return mod_format(space, w_format, w_values, do_unicode=True)
 
 def buffer__RopeUnicode(space, w_unicode):
-    from pypy.rlib.rstruct.unichar import pack_unichar
-    charlist = []
+    from pypy.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
     node = w_unicode._node
     iter = rope.ItemIterator(node)
-    for idx in range(node.length()):
+    length = node.length()
+    builder = StringBuilder(length * UNICODE_SIZE)
+    for idx in range(length):
         unich = unichr(iter.nextint())
-        pack_unichar(unich, charlist)
+        pack_unichar(unich, builder)
     from pypy.interpreter.buffer import StringBuffer
-    return space.wrap(StringBuffer(''.join(charlist)))
+    return space.wrap(StringBuffer(builder.build()))
 
 
 # methods of the iterator
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -613,6 +613,7 @@
         assert len(keys) == 2
         assert set(keys) == set([1, "a"])
         assert keys == set([1, "a"])
+        assert keys == frozenset([1, "a"])
         assert keys != set([1, "a", "b"])
         assert keys != set([1, "b"])
         assert keys != set([1])
@@ -633,6 +634,7 @@
         assert len(items) == 2
         assert set(items) == set([(1, 10), ("a", "ABC")])
         assert items == set([(1, 10), ("a", "ABC")])
+        assert items == frozenset([(1, 10), ("a", "ABC")])
         assert items != set([(1, 10), ("a", "ABC"), "junk"])
         assert items != set([(1, 10), ("a", "def")])
         assert items != set([(1, 10)])
diff --git a/pypy/objspace/std/test/test_dictproxy.py b/pypy/objspace/std/test/test_dictproxy.py
--- a/pypy/objspace/std/test/test_dictproxy.py
+++ b/pypy/objspace/std/test/test_dictproxy.py
@@ -22,6 +22,9 @@
         assert NotEmpty.string == 1
         raises(TypeError, 'NotEmpty.__dict__.setdefault(15, 1)')
 
+        key, value = NotEmpty.__dict__.popitem()
+        assert (key == 'a' and value == 1) or (key == 'b' and value == 4)
+
     def test_dictproxyeq(self):
         class a(object):
             pass
@@ -43,6 +46,11 @@
         assert s1 == s2
         assert s1.startswith('{') and s1.endswith('}')
 
+    def test_immutable_dict_on_builtin_type(self):
+        raises(TypeError, "int.__dict__['a'] = 1")
+        raises(TypeError, int.__dict__.popitem)
+        raises(TypeError, int.__dict__.clear)
+
 class AppTestUserObjectMethodCache(AppTestUserObject):
     def setup_class(cls):
         cls.space = gettestobjspace(
diff --git a/pypy/objspace/std/test/test_listobject.py b/pypy/objspace/std/test/test_listobject.py
--- a/pypy/objspace/std/test/test_listobject.py
+++ b/pypy/objspace/std/test/test_listobject.py
@@ -1,6 +1,7 @@
 # coding: iso-8859-15
 import random
-from pypy.objspace.std.listobject import W_ListObject
+from pypy.objspace.std.listobject import W_ListObject, SizeListStrategy,\
+     IntegerListStrategy, ObjectListStrategy
 from pypy.interpreter.error import OperationError
 
 from pypy.conftest import gettestobjspace, option
@@ -390,6 +391,16 @@
         assert self.space.eq_w(self.space.le(w_list4, w_list3),
                            self.space.w_True)
 
+    def test_sizehint(self):
+        space = self.space
+        w_l = space.newlist([], sizehint=10)
+        assert isinstance(w_l.strategy, SizeListStrategy)
+        space.call_method(w_l, 'append', space.wrap(3))
+        assert isinstance(w_l.strategy, IntegerListStrategy)
+        w_l = space.newlist([], sizehint=10)
+        space.call_method(w_l, 'append', space.w_None)
+        assert isinstance(w_l.strategy, ObjectListStrategy)
+
 
 class AppTestW_ListObject(object):
     def setup_class(cls):
diff --git a/pypy/objspace/std/test/test_newformat.py b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -11,6 +11,7 @@
         assert self.s("}}").format() == self.s("}")
         assert self.s("{} {{ {}").format(1, 2) == self.s("1 { 2")
         assert self.s("{{}}").format() == self.s("{}")
+        assert self.s("{{{{").format() == self.s("{{")
 
     def test_empty(self):
         assert self.s().format() == self.s()
@@ -385,6 +386,12 @@
         for x in l[0]:
             assert isinstance(x, unicode)
 
+    def test_formatter_parser_escape(self):
+        l = list("{{a}}"._formatter_parser())
+        assert l == [('{', None, None, None), ('a}', None, None, None)]
+        l = list("{{{{"._formatter_parser())
+        assert l == [('{', None, None, None), ('{', None, None, None)]
+
     def test_formatter_field_name_split(self):
         first, rest = ''._formatter_field_name_split()
         assert first == ''
diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py
--- a/pypy/objspace/std/test/test_typeobject.py
+++ b/pypy/objspace/std/test/test_typeobject.py
@@ -993,7 +993,9 @@
         raises(TypeError, setattr, list, 'append', 42)
         raises(TypeError, setattr, list, 'foobar', 42)
         raises(TypeError, delattr, dict, 'keys')
-        
+        raises(TypeError, 'int.__dict__["a"] = 1')
+        raises(TypeError, 'int.__dict__.clear()')
+
     def test_nontype_in_mro(self):
         class OldStyle:
             pass
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -103,6 +103,7 @@
                           'terminator',
                           '_version_tag?',
                           'name?',
+                          'mro_w?[*]',
                           ]
 
     # for config.objspace.std.getattributeshortcut
@@ -345,9 +346,9 @@
 
         return w_self._lookup_where(name)
 
+    @unroll_safe
     def lookup_starting_at(w_self, w_starttype, name):
         space = w_self.space
-        # XXX Optimize this with method cache
         look = False
         for w_class in w_self.mro_w:
             if w_class is w_starttype:
diff --git a/pypy/rlib/debug.py b/pypy/rlib/debug.py
--- a/pypy/rlib/debug.py
+++ b/pypy/rlib/debug.py
@@ -19,14 +19,24 @@
         hop.exception_cannot_occur()
         hop.genop('debug_assert', vlist)
 
-def fatalerror(msg, traceback=False):
+def fatalerror(msg):
+    # print the RPython traceback and abort with a fatal error
     from pypy.rpython.lltypesystem import lltype
     from pypy.rpython.lltypesystem.lloperation import llop
-    if traceback:
-        llop.debug_print_traceback(lltype.Void)
+    llop.debug_print_traceback(lltype.Void)
     llop.debug_fatalerror(lltype.Void, msg)
 fatalerror._dont_inline_ = True
-fatalerror._annspecialcase_ = 'specialize:arg(1)'
+fatalerror._jit_look_inside_ = False
+fatalerror._annenforceargs_ = [str]
+
+def fatalerror_notb(msg):
+    # a variant of fatalerror() that doesn't print the RPython traceback
+    from pypy.rpython.lltypesystem import lltype
+    from pypy.rpython.lltypesystem.lloperation import llop
+    llop.debug_fatalerror(lltype.Void, msg)
+fatalerror_notb._dont_inline_ = True
+fatalerror_notb._jit_look_inside_ = False
+fatalerror_notb._annenforceargs_ = [str]
 
 
 class DebugLog(list):
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -392,6 +392,9 @@
 class JitHintError(Exception):
     """Inconsistency in the JIT hints."""
 
+ENABLE_ALL_OPTS = (
+    'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:ffi:unroll')
+
 PARAMETER_DOCS = {
     'threshold': 'number of times a loop has to run for it to become hot',
     'function_threshold': 'number of times a function must run for it to become traced from start',
@@ -402,7 +405,8 @@
     'retrace_limit': 'how many times we can try retracing before giving up',
     'max_retrace_guards': 'number of extra guards a retrace can cause',
     'max_unroll_loops': 'number of extra unrollings a loop can cause',
-    'enable_opts': 'optimizations to enable or all, INTERNAL USE ONLY'
+    'enable_opts': 'INTERNAL USE ONLY: optimizations to enable, or all = %s' %
+                       ENABLE_ALL_OPTS,
     }
 
 PARAMETERS = {'threshold': 1039, # just above 1024, prime
@@ -450,6 +454,7 @@
             assert v in self.reds
         self._alllivevars = dict.fromkeys(
             [name for name in self.greens + self.reds if '.' not in name])
+        self._heuristic_order = {}   # check if 'reds' and 'greens' are ordered
         self._make_extregistryentries()
         self.get_jitcell_at = get_jitcell_at
         self.set_jitcell_at = set_jitcell_at
@@ -461,13 +466,61 @@
     def _freeze_(self):
         return True
 
+    def _check_arguments(self, livevars):
+        assert dict.fromkeys(livevars) == self._alllivevars
+        # check heuristically that 'reds' and 'greens' are ordered as
+        # the JIT will need them to be: first INTs, then REFs, then
+        # FLOATs.
+        if len(self._heuristic_order) < len(livevars):
+            from pypy.rlib.rarithmetic import (r_singlefloat, r_longlong,
+                                               r_ulonglong, r_uint)
+            added = False
+            for var, value in livevars.items():
+                if var not in self._heuristic_order:
+                    if (r_ulonglong is not r_uint and
+                            isinstance(value, (r_longlong, r_ulonglong))):
+                        assert 0, ("should not pass a r_longlong argument for "
+                                   "now, because on 32-bit machines it needs "
+                                   "to be ordered as a FLOAT but on 64-bit "
+                                   "machines as an INT")
+                    elif isinstance(value, (int, long, r_singlefloat)):
+                        kind = '1:INT'
+                    elif isinstance(value, float):
+                        kind = '3:FLOAT'
+                    elif isinstance(value, (str, unicode)) and len(value) != 1:
+                        kind = '2:REF'
+                    elif isinstance(value, (list, dict)):
+                        kind = '2:REF'
+                    elif (hasattr(value, '__class__')
+                          and value.__class__.__module__ != '__builtin__'):
+                        if hasattr(value, '_freeze_'):
+                            continue   # value._freeze_() is better not called
+                        elif getattr(value, '_alloc_flavor_', 'gc') == 'gc':
+                            kind = '2:REF'
+                        else:
+                            kind = '1:INT'
+                    else:
+                        continue
+                    self._heuristic_order[var] = kind
+                    added = True
+            if added:
+                for color in ('reds', 'greens'):
+                    lst = getattr(self, color)
+                    allkinds = [self._heuristic_order.get(name, '?')
+                                for name in lst]
+                    kinds = [k for k in allkinds if k != '?']
+                    assert kinds == sorted(kinds), (
+                        "bad order of %s variables in the jitdriver: "
+                        "must be INTs, REFs, FLOATs; got %r" %
+                        (color, allkinds))
+
     def jit_merge_point(_self, **livevars):
         # special-cased by ExtRegistryEntry
-        assert dict.fromkeys(livevars) == _self._alllivevars
+        _self._check_arguments(livevars)
 
     def can_enter_jit(_self, **livevars):
         # special-cased by ExtRegistryEntry
-        assert dict.fromkeys(livevars) == _self._alllivevars
+        _self._check_arguments(livevars)
 
     def loop_header(self):
         # special-cased by ExtRegistryEntry
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -23,9 +23,11 @@
 
 class _Specialize(object):
     def memo(self):
-        """ Specialize functions based on argument values. All arguments has
-        to be constant at the compile time. The whole function call is replaced
-        by a call result then.
+        """ Specialize the function based on argument values.  All arguments
+        have to be either constants or PBCs (i.e. instances of classes with a
+        _freeze_ method returning True).  The function call is replaced by
+        just its result, or in case several PBCs are used, by some fast
+        look-up of the result.
         """
         def decorated_func(func):
             func._annspecialcase_ = 'specialize:memo'
@@ -33,8 +35,8 @@
         return decorated_func
 
     def arg(self, *args):
-        """ Specialize function based on values of given positions of arguments.
-        They must be compile-time constants in order to work.
+        """ Specialize the function based on the values of given positions
+        of arguments.  They must be compile-time constants in order to work.
 
         There will be a copy of provided function for each combination
         of given arguments on positions in args (that can lead to
@@ -82,8 +84,7 @@
         return decorated_func
 
     def ll_and_arg(self, *args):
-        """ This is like ll(), but instead of specializing on all arguments,
-        specializes on only the arguments at the given positions
+        """ This is like ll(), and additionally like arg(...).
         """
         def decorated_func(func):
             func._annspecialcase_ = 'specialize:ll_and_arg' + self._wrap(args)
@@ -232,20 +233,22 @@
 
 # ____________________________________________________________
 
-def newlist(sizehint=0):
+def newlist_hint(sizehint=0):
     """ Create a new list, but pass a hint how big the size should be
     preallocated
     """
     return []
 
 class Entry(ExtRegistryEntry):
-    _about_ = newlist
+    _about_ = newlist_hint
 
     def compute_result_annotation(self, s_sizehint):
         from pypy.annotation.model import SomeInteger
 
         assert isinstance(s_sizehint, SomeInteger)
-        return self.bookkeeper.newlist()
+        s_l = self.bookkeeper.newlist()
+        s_l.listdef.listitem.resize()
+        return s_l
 
     def specialize_call(self, orig_hop, i_sizehint=None):
         from pypy.rpython.rlist import rtype_newlist
diff --git a/pypy/rlib/rerased_raw.py b/pypy/rlib/rerased_raw.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/rerased_raw.py
@@ -0,0 +1,204 @@
+"""
+This module supports raw storage of arbitrary data (GC ptr, instance, float,
+int) into a void *.  The creator of the UntypedStorage is responsible for
+making sure that the shape string is cached correctly.
+"""
+
+from pypy.annotation import model as annmodel
+from pypy.annotation.bookkeeper import getbookkeeper
+from pypy.rpython.annlowlevel import hlstr, llstr, llhelper
+from pypy.rpython.extregistry import ExtRegistryEntry
+from pypy.rpython.lltypesystem import rffi, lltype, llmemory
+from pypy.rpython.lltypesystem.rstr import STR, string_repr
+from pypy.rpython.rmodel import Repr
+
+
+INT = "i"
+FLOAT = "f"
+INSTANCE = "o"
+
+class UntypedStorage(object):
+    def __init__(self, shape):
+        self.storage = [None] * len(shape)
+        self.shape = shape
+
+    def getint(self, idx):
+        assert self.shape[idx] == INT
+        v = self.storage[idx]
+        assert isinstance(v, int)
+        return v
+
+    def setint(self, idx, v):
+        assert self.shape[idx] == INT
+        assert isinstance(v, int)
+        self.storage[idx] = v
+
+    def getfloat(self, idx):
+        assert self.shape[idx] == FLOAT
+        v = self.storage[idx]
+        assert isinstance(v, float)
+        return v
+
+    def setfloat(self, idx, f):
+        assert self.shape[idx] == FLOAT
+        assert isinstance(f, float)
+        self.storage[idx] = f
+
+    def getinstance(self, idx, cls):
+        obj = self.storage[idx]
+        assert self.shape[idx] == INSTANCE
+        assert isinstance(obj, cls)
+        return obj
+
+    def setinstance(self, idx, obj):
+        assert self.shape[idx] == INSTANCE
+        self.storage[idx] = obj
+
+class UntypedStorageEntry(ExtRegistryEntry):
+    _about_ = UntypedStorage
+
+    def compute_result_annotation(self, s_shape):
+        assert annmodel.SomeString().contains(s_shape)
+        return SomeUntypedStorage()
+
+    def specialize_call(self, hop):
+        return hop.r_result.rtyper_new(hop)
+
+
+class SomeUntypedStorage(annmodel.SomeObject):
+    def _check_idx(self, s_idx):
+        assert annmodel.SomeInteger().contains(s_idx)
+
+    def rtyper_makerepr(self, rtyper):
+        return UntypedStorageRepr()
+
+    def method_getint(self, s_idx):
+        self._check_idx(s_idx)
+        return annmodel.SomeInteger()
+
+    def method_setint(self, s_idx, s_v):
+        self._check_idx(s_idx)
+        assert annmodel.SomeInteger().contains(s_v)
+
+    def method_getfloat(self, s_idx):
+        self._check_idx(s_idx)
+        return annmodel.SomeFloat()
+
+    def method_setfloat(self, s_idx, s_f):
+        self._check_idx(s_idx)
+        assert annmodel.SomeFloat().contains(s_f)
+
+    def method_getinstance(self, s_idx, s_cls):
+        self._check_idx(s_idx)
+        assert isinstance(s_cls, annmodel.SomePBC)
+        bookkeeper = getbookkeeper()
+        classdef = bookkeeper.getuniqueclassdef(s_cls.const)
+        return annmodel.SomeInstance(classdef, can_be_None=True)
+
+    def method_setinstance(self, s_idx, s_obj):
+        self._check_idx(s_idx)
+        assert isinstance(s_obj, annmodel.SomeInstance)
+
+UNTYPEDSTORAGE = lltype.GcStruct("untypedstorage",
+    ("shape", lltype.Ptr(STR)),
+    ("data", lltype.Array(llmemory.Address)),
+    rtti=True,
+)
+
+
+CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
+                                  llmemory.Address)
+def trace_untypedstorage(obj_addr, prev):
+    # XXX: This has O(n**2) complexity because of the below loop, if we could
+    # do more arithmetic ops on addresses then it could be O(n).
+    shape_addr = obj_addr + llmemory.offsetof(UNTYPEDSTORAGE, "shape")
+    if not prev:
+        return shape_addr
+    shape = shape_addr.address[0]
+    length_offset = (llmemory.offsetof(STR, "chars") +
+        llmemory.arraylengthoffset(STR.chars))
+    length = (shape + length_offset).signed[0]
+
+    seen_prev = prev == shape_addr
+    i = 0
+    while i < length:
+        data_ptr = (obj_addr + llmemory.offsetof(UNTYPEDSTORAGE, "data") +
+            llmemory.itemoffsetof(UNTYPEDSTORAGE.data, 0) +
+            llmemory.sizeof(UNTYPEDSTORAGE.data.OF) * i)
+
+        if not seen_prev:
+            if data_ptr == prev:
+                seen_prev = True
+            i += 1
+            continue
+
+        char = (shape + llmemory.offsetof(STR, "chars") +
+                llmemory.itemoffsetof(STR.chars, 0) +
+                (llmemory.sizeof(STR.chars.OF) * i)).char[0]
+        if char == INSTANCE:
+            return data_ptr
+        i += 1
+    return llmemory.NULL
+trace_untypedstorage_ptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), trace_untypedstorage)
+
+class UntypedStorageRepr(Repr):
+    lowleveltype = lltype.Ptr(UNTYPEDSTORAGE)
+    lltype.attachRuntimeTypeInfo(lowleveltype.TO, customtraceptr=trace_untypedstorage_ptr)
+
+    def _read_index(self, hop):
+        v_arr = hop.inputarg(self, arg=0)
+        v_idx = hop.inputarg(lltype.Signed, arg=1)
+        c_name = hop.inputconst(lltype.Void, "data")
+        hop.exception_cannot_occur()
+        return hop.genop("getinteriorfield", [v_arr, c_name, v_idx],
+                         resulttype=llmemory.Address)
+
+    def _write_index(self, hop, v_value):
+        v_arr = hop.inputarg(self, arg=0)
+        v_idx = hop.inputarg(lltype.Signed, arg=1)
+        hop.exception_cannot_occur()
+        c_name = hop.inputconst(lltype.Void, "data")
+        hop.genop("setinteriorfield", [v_arr, c_name, v_idx, v_value])
+
+    def rtyper_new(self, hop):
+        [v_shape] = hop.inputargs(string_repr)
+        hop.exception_cannot_occur()
+        return hop.gendirectcall(self.ll_new, v_shape)
+
+    def rtype_method_getint(self, hop):
+        v_addr = self._read_index(hop)
+        return hop.genop("force_cast", [v_addr], resulttype=lltype.Signed)
+
+    def rtype_method_setint(self, hop):
+        v_value = hop.inputarg(lltype.Signed, arg=2)
+        v_addr = hop.genop("force_cast", [v_value], resulttype=llmemory.Address)
+        self._write_index(hop, v_addr)
+
+    def rtype_method_getfloat(self, hop):
+        v_value = self._read_index(hop)
+        return hop.genop("cast_adr_to_float", [v_value], resulttype=lltype.Float)
+
+    def rtype_method_setfloat(self, hop):
+        v_value = hop.inputarg(lltype.Float, arg=2)
+
+        v_addr = hop.genop("cast_float_to_adr", [v_value], resulttype=llmemory.Address)
+        self._write_index(hop, v_addr)
+
+    def rtype_method_getinstance(self, hop):
+        v_addr = self._read_index(hop)
+        return hop.genop("cast_adr_to_ptr", [v_addr], resulttype=hop.r_result.lowleveltype)
+
+    def rtype_method_setinstance(self, hop):
+        v_arr = hop.inputarg(self, arg=0)
+        v_instance = hop.inputarg(hop.args_r[2], arg=2)
+        hop.genop("gc_writebarrier", [v_instance, v_arr])
+
+        v_addr = hop.genop("cast_ptr_to_adr", [v_instance],
+                           resulttype=llmemory.Address)
+        self._write_index(hop, v_addr)
+
+    @classmethod
+    def ll_new(cls, shape):
+        obj = lltype.malloc(cls.lowleveltype.TO, len(shape.chars))
+        obj.shape = shape
+        return obj
\ No newline at end of file
diff --git a/pypy/rlib/rstruct/ieee.py b/pypy/rlib/rstruct/ieee.py
--- a/pypy/rlib/rstruct/ieee.py
+++ b/pypy/rlib/rstruct/ieee.py
@@ -4,7 +4,7 @@
 
 import math
 
-from pypy.rlib import rarithmetic, rfloat, objectmodel
+from pypy.rlib import rarithmetic, rfloat, objectmodel, jit
 from pypy.rlib.rarithmetic import r_ulonglong
 
 
@@ -135,14 +135,15 @@
     return ((sign << BITS - 1) | (exp << MANT_DIG - 1)) | mant
 
 
+ at jit.unroll_safe
 def pack_float(result, x, size, be):
-    l = [] if be else result
+    l = []
     unsigned = float_pack(x, size)
     for i in range(size):
         l.append(chr((unsigned >> (i * 8)) & 0xFF))
     if be:
         l.reverse()
-        result.extend(l)
+    result.append("".join(l))
 
 
 def unpack_float(s, be):
diff --git a/pypy/rlib/rstruct/nativefmttable.py b/pypy/rlib/rstruct/nativefmttable.py
--- a/pypy/rlib/rstruct/nativefmttable.py
+++ b/pypy/rlib/rstruct/nativefmttable.py
@@ -3,6 +3,7 @@
 The table 'native_fmttable' is also used by pypy.module.array.interp_array.
 """
 import struct
+from pypy.rlib import jit
 from pypy.rlib.rstruct import standardfmttable as std
 from pypy.rlib.rstruct.error import StructError
 from pypy.rpython.tool import rffi_platform
@@ -25,12 +26,15 @@
 double_buf = lltype.malloc(rffi.DOUBLEP.TO, 1, flavor='raw', immortal=True)
 float_buf = lltype.malloc(rffi.FLOATP.TO, 1, flavor='raw', immortal=True)
 
+ at jit.dont_look_inside
+def double_to_ccharp(doubleval):
+    double_buf[0] = doubleval
+    return rffi.cast(rffi.CCHARP, double_buf)
+
 def pack_double(fmtiter):
     doubleval = fmtiter.accept_float_arg()
-    double_buf[0] = doubleval
-    p = rffi.cast(rffi.CCHARP, double_buf)
-    for i in range(sizeof_double):
-        fmtiter.result.append(p[i])
+    p = double_to_ccharp(doubleval)
+    fmtiter.result.append_charpsize(p, rffi.sizeof(rffi.DOUBLE))
 
 @specialize.argtype(0)
 def unpack_double(fmtiter):
@@ -41,13 +45,16 @@
     doubleval = double_buf[0]
     fmtiter.appendobj(doubleval)
 
+ at jit.dont_look_inside
+def float_to_ccharp(floatval):
+    float_buf[0] = floatval
+    return rffi.cast(rffi.CCHARP, float_buf)
+
 def pack_float(fmtiter):
     doubleval = fmtiter.accept_float_arg()
     floatval = r_singlefloat(doubleval)
-    float_buf[0] = floatval
-    p = rffi.cast(rffi.CCHARP, float_buf)
-    for i in range(sizeof_float):
-        fmtiter.result.append(p[i])
+    p = float_to_ccharp(floatval)
+    fmtiter.result.append_charpsize(p, rffi.sizeof(rffi.FLOAT))
 
 @specialize.argtype(0)
 def unpack_float(fmtiter):
diff --git a/pypy/rlib/rstruct/standardfmttable.py b/pypy/rlib/rstruct/standardfmttable.py
--- a/pypy/rlib/rstruct/standardfmttable.py
+++ b/pypy/rlib/rstruct/standardfmttable.py
@@ -21,8 +21,7 @@
 # ____________________________________________________________
 
 def pack_pad(fmtiter, count):
-    for i in range(count):
-        fmtiter.result.append('\x00')
+    fmtiter.result.append_multiple_char('\x00', count)
 
 def pack_char(fmtiter):
     string = fmtiter.accept_str_arg()
@@ -38,11 +37,10 @@
 def pack_string(fmtiter, count):
     string = fmtiter.accept_str_arg()
     if len(string) < count:
-        fmtiter.result += string
-        for i in range(len(string), count):
-            fmtiter.result.append('\x00')
+        fmtiter.result.append(string)
+        fmtiter.result.append_multiple_char('\x00', count - len(string))
     else:
-        fmtiter.result += string[:count]
+        fmtiter.result.append_slice(string, 0, count)
 
 def pack_pascal(fmtiter, count):
     string = fmtiter.accept_str_arg()
@@ -56,9 +54,8 @@
     else:
         prefixchar = chr(prefix)
     fmtiter.result.append(prefixchar)
-    fmtiter.result += string[:prefix]
-    for i in range(1 + prefix, count):
-        fmtiter.result.append('\x00')
+    fmtiter.result.append_slice(string, 0, prefix)
+    fmtiter.result.append_multiple_char('\x00', count - (1 + prefix))
 
 def make_float_packer(size):
     def packer(fmtiter):
diff --git a/pypy/rlib/test/test_jit.py b/pypy/rlib/test/test_jit.py
--- a/pypy/rlib/test/test_jit.py
+++ b/pypy/rlib/test/test_jit.py
@@ -2,6 +2,7 @@
 from pypy.conftest import option
 from pypy.rlib.jit import hint, we_are_jitted, JitDriver, elidable_promote
 from pypy.rlib.jit import JitHintError, oopspec, isconstant
+from pypy.rlib.rarithmetic import r_uint
 from pypy.translator.translator import TranslationContext, graphof
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 from pypy.rpython.lltypesystem import lltype
@@ -146,6 +147,43 @@
         res = self.interpret(f, [-234])
         assert res == 1
 
+    def test_argument_order_ok(self):
+        myjitdriver = JitDriver(greens=['i1', 'r1', 'f1'], reds=[])
+        class A(object):
+            pass
+        myjitdriver.jit_merge_point(i1=42, r1=A(), f1=3.5)
+        # assert did not raise
+
+    def test_argument_order_wrong(self):
+        myjitdriver = JitDriver(greens=['r1', 'i1', 'f1'], reds=[])
+        class A(object):
+            pass
+        e = raises(AssertionError,
+                   myjitdriver.jit_merge_point, i1=42, r1=A(), f1=3.5)
+
+    def test_argument_order_more_precision_later(self):
+        myjitdriver = JitDriver(greens=['r1', 'i1', 'r2', 'f1'], reds=[])
+        class A(object):
+            pass
+        myjitdriver.jit_merge_point(i1=42, r1=None, r2=None, f1=3.5)
+        e = raises(AssertionError,
+                   myjitdriver.jit_merge_point, i1=42, r1=A(), r2=None, f1=3.5)
+        assert "got ['2:REF', '1:INT', '?', '3:FLOAT']" in repr(e.value)
+
+    def test_argument_order_more_precision_later_2(self):
+        myjitdriver = JitDriver(greens=['r1', 'i1', 'r2', 'f1'], reds=[])
+        class A(object):
+            pass
+        myjitdriver.jit_merge_point(i1=42, r1=None, r2=A(), f1=3.5)
+        e = raises(AssertionError,
+                   myjitdriver.jit_merge_point, i1=42, r1=A(), r2=None, f1=3.5)
+        assert "got ['2:REF', '1:INT', '2:REF', '3:FLOAT']" in repr(e.value)
+
+    def test_argument_order_accept_r_uint(self):
+        # this used to fail on 64-bit, because r_uint == r_ulonglong
+        myjitdriver = JitDriver(greens=['i1'], reds=[])
+        myjitdriver.jit_merge_point(i1=r_uint(42))
+
 
 class TestJITLLtype(BaseTestJIT, LLRtypeMixin):
     pass
diff --git a/pypy/rlib/test/test_objectmodel.py b/pypy/rlib/test/test_objectmodel.py
--- a/pypy/rlib/test/test_objectmodel.py
+++ b/pypy/rlib/test/test_objectmodel.py
@@ -442,7 +442,7 @@
 def test_newlist():
     from pypy.annotation.model import SomeInteger
     def f(z):
-        x = newlist(sizehint=38)
+        x = newlist_hint(sizehint=38)
         if z < 0:
             x.append(1)
         return len(x)
@@ -456,7 +456,7 @@
 def test_newlist_nonconst():
     from pypy.annotation.model import SomeInteger
     def f(z):
-        x = newlist(sizehint=z)
+        x = newlist_hint(sizehint=z)
         return len(x)
 
     graph = getgraph(f, [SomeInteger()])
diff --git a/pypy/rlib/test/test_rerased_raw.py b/pypy/rlib/test/test_rerased_raw.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/test/test_rerased_raw.py
@@ -0,0 +1,94 @@
+import py
+
+from pypy.rlib import rerased_raw
+from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin
+
+
+def test_direct_int():
+    storage = rerased_raw.UntypedStorage("ii")
+
+    storage.setint(0, 2)
+    assert storage.getint(0) == 2
+
+    storage.setint(1, 5)
+    assert storage.getint(1) == 5
+
+def test_direct_float():
+    storage = rerased_raw.UntypedStorage("f")
+    storage.setfloat(0, 5.5)
+
+    assert storage.getfloat(0) == 5.5
+
+def test_direct_instance():
+    class A(object):
+        def __init__(self, value):
+            self.value = value
+
+    storage = rerased_raw.UntypedStorage("o")
+    storage.setinstance(0, A(4))
+
+    assert storage.getinstance(0, A).value == 4
+
+
+class TestRerasedRawLLType(LLRtypeMixin, BaseRtypingTest):
+    def test_int(self):
+        def f(x):
+            storage = rerased_raw.UntypedStorage("i")
+            storage.setint(0, x)
+            return storage.getint(0)
+
+        res = self.interpret(f, [4])
+        assert res == 4
+
+    def test_instance(self):
+        class A(object):
+            def __init__(self, v):
+                self.v = v
+
+        def f(x):
+            storage = rerased_raw.UntypedStorage("o")
+            storage.setinstance(0, A(x))
+            return storage.getinstance(0, A).v
+
+        res = self.interpret(f, [27])
+        assert res == 27
+
+    # def test_float(self):
+    #     def f(x):
+    #         storage = rerased_raw.UntypedStorage("f")
+    #         storage.setfloat(0, x)
+    #         return storage.getfloat(0)
+
+    #     res = self.interpret(f, [12.3])
+    #     assert res == 12.3
+
+    def test_exception_catching(self):
+        class A(object):
+            def __init__(self, v):
+                self.v = v
+
+        def f(x):
+            try:
+                storage = rerased_raw.UntypedStorage("io")
+                storage.setint(0, x)
+                value1 = storage.getint(0)
+                storage.setinstance(1, A(x))
+                value2 = storage.getinstance(1, A)
+                return value1 + value2.v
+            except Exception:
+                return 50000
+
+        res = self.interpret(f, [4])
+        assert res == 8
+
+    def test_union(self):
+        def f(x, v):
+            if x:
+                storage = rerased_raw.UntypedStorage("i")
+            else:
+                storage = rerased_raw.UntypedStorage("ii")
+            storage.setint(0, v)
+            return storage.getint(0)
+
+        res = self.interpret(f, [True, 15])
+        assert res == 15
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -42,7 +42,7 @@
         return ''.join(etype.name).rstrip('\x00')
     else:
         # ootype!
-        return etype._INSTANCE._name.split(".")[-1] 
+        return etype._INSTANCE._name.split(".")[-1]
 
 class LLInterpreter(object):
     """ low level interpreter working with concrete values. """
@@ -654,11 +654,11 @@
             return self.invoke_callable_with_pyexceptions(f, *args)
         args_v = graph.getargs()
         if len(ARGS) != len(args_v):
-            raise TypeError("graph with %d args called with wrong func ptr type: %r" %(len(args_v), ARGS)) 
+            raise TypeError("graph with %d args called with wrong func ptr type: %r" %(len(args_v), ARGS))
         for T, v in zip(ARGS, args_v):
             if not lltype.isCompatibleType(T, v.concretetype):
                 raise TypeError("graph with %r args called with wrong func ptr type: %r" %
-                                (tuple([v.concretetype for v in args_v]), ARGS)) 
+                                (tuple([v.concretetype for v in args_v]), ARGS))
         frame = self.newsubframe(graph, args)
         return frame.eval()
 
@@ -672,7 +672,7 @@
         if graphs is not None:
             obj = self.llinterpreter.typer.type_system.deref(f)
             if hasattr(obj, 'graph'):
-                assert obj.graph in graphs 
+                assert obj.graph in graphs
         else:
             pass
             #log.warn("op_indirect_call with graphs=None:", f)
@@ -728,6 +728,12 @@
     def op_zero_gc_pointers_inside(self, obj):
         raise NotImplementedError("zero_gc_pointers_inside")
 
+    def op_gc_writebarrier(self, newvalue, struct):
+        assert isinstance(lltype.typeOf(newvalue), lltype.Ptr)
+        assert isinstance(lltype.typeOf(struct), lltype.Ptr)
+        if hasattr(self.heap, "writebarrier"):
+            self.heap.writebarrier(newvalue, struct)
+
     def op_gc_writebarrier_before_copy(self, source, dest,
                                        source_start, dest_start, length):
         if hasattr(self.heap, 'writebarrier_before_copy'):
@@ -982,7 +988,7 @@
         return llmemory.raw_malloc_usage(size)
 
     def op_raw_free(self, addr):
-        checkadr(addr) 
+        checkadr(addr)
         llmemory.raw_free(addr)
 
     def op_raw_memclear(self, addr, size):
@@ -1126,7 +1132,7 @@
     def op_new(self, INST):
         assert isinstance(INST, (ootype.Instance, ootype.BuiltinType))
         return ootype.new(INST)
-        
+
     def op_oonewarray(self, ARRAY, length):
         assert isinstance(ARRAY, ootype.Array)
         assert isinstance(length, int)
@@ -1140,7 +1146,7 @@
         eq_name, interp_eq = \
                  wrap_callable(self.llinterpreter, eq_func, eq_obj, eq_method_name)
         EQ_FUNC = ootype.StaticMethod([DICT._KEYTYPE, DICT._KEYTYPE], ootype.Bool)
-        sm_eq = ootype.static_meth(EQ_FUNC, eq_name, _callable=interp_eq)        
+        sm_eq = ootype.static_meth(EQ_FUNC, eq_name, _callable=interp_eq)
 
         hash_name, interp_hash = \
                    wrap_callable(self.llinterpreter, hash_func, hash_obj, hash_method_name)
diff --git a/pypy/rpython/lltypesystem/llmemory.py b/pypy/rpython/lltypesystem/llmemory.py
--- a/pypy/rpython/lltypesystem/llmemory.py
+++ b/pypy/rpython/lltypesystem/llmemory.py
@@ -5,7 +5,7 @@
 # sizeof, offsetof
 
 import weakref
-from pypy.rlib.objectmodel import Symbolic
+from pypy.rlib.objectmodel import Symbolic, specialize
 from pypy.rpython.lltypesystem import lltype
 from pypy.tool.uid import uid
 
@@ -380,6 +380,7 @@
     else:
         raise Exception("don't know how to take the size of a %r"%TYPE)
 
+ at specialize.arg(0)
 def sizeof(TYPE, n=None):
     if n is None:
         return _sizeof_none(TYPE)
@@ -387,19 +388,23 @@
         return itemoffsetof(TYPE) + _sizeof_none(TYPE.OF) * n
     else:
         return _sizeof_int(TYPE, n)
-sizeof._annspecialcase_ = 'specialize:arg(0)'
 
+ at specialize.memo()
 def offsetof(TYPE, fldname):
     assert fldname in TYPE._flds
     return FieldOffset(TYPE, fldname)
-offsetof._annspecialcase_ = 'specialize:memo'
 
+ at specialize.memo()
 def itemoffsetof(TYPE, n=0):
     result = ArrayItemsOffset(TYPE)
     if n != 0:
         result += ItemOffset(TYPE.OF) * n
     return result
-itemoffsetof._annspecialcase_ = 'specialize:memo'
+
+ at specialize.memo()
+def arraylengthoffset(TYPE):
+    return ArrayLengthOffset(TYPE)
+
 # -------------------------------------------------------------
 
 class fakeaddress(object):
@@ -880,7 +885,7 @@
         assert isinstance(s_from, SomeAddress)
         assert isinstance(s_to, SomeAddress)
         assert isinstance(s_size, SomeInteger)
-    
+
     def specialize_call(self, hop):
         hop.exception_cannot_occur()
         v_list = hop.inputargs(Address, Address, lltype.Signed)
diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -464,6 +464,7 @@
     'gc_thread_before_fork':LLOp(),   # returns an opaque address
     'gc_thread_after_fork': LLOp(),   # arguments: (result_of_fork, opaqueaddr)
     'gc_assume_young_pointers': LLOp(canrun=True),
+    'gc_writebarrier': LLOp(canrun=True),
     'gc_writebarrier_before_copy': LLOp(canrun=True),
     'gc_heap_stats'       : LLOp(canmallocgc=True),
 
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -433,7 +433,8 @@
         TYPES.append(name)
 TYPES += ['signed char', 'unsigned char',
           'long long', 'unsigned long long',
-          'size_t', 'time_t', 'wchar_t']
+          'size_t', 'time_t', 'wchar_t',
+          'uintptr_t', 'intptr_t']
 if os.name != 'nt':
     TYPES.append('mode_t')
     TYPES.append('pid_t')
@@ -617,8 +618,6 @@
 # (use SIGNEDCHAR or UCHAR for the small integer types)
 CHAR = lltype.Char
 
-INTPTR_T = SSIZE_T
-
 # double
 DOUBLE = lltype.Float
 LONGDOUBLE = lltype.LongFloat
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -60,7 +60,6 @@
         ITEMARRAY = GcArray(ITEM,
                             adtmeths = ADTIFixedList({
                                  "ll_newlist": ll_fixed_newlist,
-                                 "ll_newlist_hint": ll_fixed_newlist,
                                  "ll_newemptylist": ll_fixed_newemptylist,
                                  "ll_length": ll_fixed_length,
                                  "ll_items": ll_fixed_items,
@@ -271,7 +270,7 @@
     l.items = malloc(LIST.items.TO, lengthhint)
     return l
 ll_newlist_hint = typeMethod(ll_newlist_hint)
-ll_newlist_hint.oopspec = 'newlist(lengthhint)'
+ll_newlist_hint.oopspec = 'newlist_hint(lengthhint)'
 
 # should empty lists start with no allocated memory, or with a preallocated
 # minimal number of entries?  XXX compare memory usage versus speed, and
@@ -315,16 +314,16 @@
 
 # fixed size versions
 
+ at typeMethod
 def ll_fixed_newlist(LIST, length):
     ll_assert(length >= 0, "negative fixed list length")
     l = malloc(LIST, length)
     return l
-ll_fixed_newlist = typeMethod(ll_fixed_newlist)
 ll_fixed_newlist.oopspec = 'newlist(length)'
 
+ at typeMethod
 def ll_fixed_newemptylist(LIST):
     return ll_fixed_newlist(LIST, 0)
-ll_fixed_newemptylist = typeMethod(ll_fixed_newemptylist)
 
 def ll_fixed_length(l):
     return len(l)
@@ -392,7 +391,11 @@
                                          ('list', r_list.lowleveltype),
                                          ('index', Signed)))
         self.ll_listiter = ll_listiter
-        self.ll_listnext = ll_listnext
+        if (isinstance(r_list, FixedSizeListRepr)
+                and not r_list.listitem.mutated):
+            self.ll_listnext = ll_listnext_foldable
+        else:
+            self.ll_listnext = ll_listnext
         self.ll_getnextindex = ll_getnextindex
 
 def ll_listiter(ITERPTR, lst):
@@ -409,5 +412,14 @@
     iter.index = index + 1      # cannot overflow because index < l.length
     return l.ll_getitem_fast(index)
 
+def ll_listnext_foldable(iter):
+    from pypy.rpython.rlist import ll_getitem_foldable_nonneg
+    l = iter.list
+    index = iter.index
+    if index >= l.ll_length():
+        raise StopIteration
+    iter.index = index + 1      # cannot overflow because index < l.length
+    return ll_getitem_foldable_nonneg(l, index)
+
 def ll_getnextindex(iter):
     return iter.index
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -62,6 +62,14 @@
     @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
     @enforceargs(None, None, int, int, int)
     def copy_string_contents(src, dst, srcstart, dststart, length):
+        """Copies 'length' characters from the 'src' string to the 'dst'
+        string, starting at position 'srcstart' and 'dststart'."""
+        # xxx Warning: don't try to do this at home.  It relies on a lot
+        # of details to be sure that it works correctly in all cases.
+        # Notably: no GC operation at all from the first cast_ptr_to_adr()
+        # because it might move the strings.  The keepalive_until_here()
+        # are obscurely essential to make sure that the strings stay alive
+        # longer than the raw_memcopy().
         assert srcstart >= 0
         assert dststart >= 0
         assert length >= 0
diff --git a/pypy/rpython/memory/gc/generation.py b/pypy/rpython/memory/gc/generation.py
--- a/pypy/rpython/memory/gc/generation.py
+++ b/pypy/rpython/memory/gc/generation.py
@@ -41,8 +41,8 @@
 
     # the following values override the default arguments of __init__ when
     # translating to a real backend.
-    TRANSLATION_PARAMS = {'space_size': 8*1024*1024, # XXX adjust
-                          'nursery_size': 896*1024,
+    TRANSLATION_PARAMS = {'space_size': 8*1024*1024,     # 8 MB
+                          'nursery_size': 3*1024*1024,   # 3 MB
                           'min_nursery_size': 48*1024,
                           'auto_nursery_size': True}
 
@@ -92,8 +92,9 @@
         # the GC is fully setup now.  The rest can make use of it.
         if self.auto_nursery_size:
             newsize = nursery_size_from_env()
-            if newsize <= 0:
-                newsize = env.estimate_best_nursery_size()
+            #if newsize <= 0:
+            #    ---disabled--- just use the default value.
+            #    newsize = env.estimate_best_nursery_size()
             if newsize > 0:
                 self.set_nursery_size(newsize)
 
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -608,6 +608,11 @@
         specified as 0 if the object is not varsized.  The returned
         object is fully initialized and zero-filled."""
         #
+        # Here we really need a valid 'typeid', not 0 (as the JIT might
+        # try to send us if there is still a bug).
+        ll_assert(bool(self.combine(typeid, 0)),
+                  "external_malloc: typeid == 0")
+        #
         # Compute the total size, carefully checking for overflows.
         size_gc_header = self.gcheaderbuilder.size_gc_header
         nonvarsize = size_gc_header + self.fixed_size(typeid)
diff --git a/pypy/rpython/memory/gctransform/asmgcroot.py b/pypy/rpython/memory/gctransform/asmgcroot.py
--- a/pypy/rpython/memory/gctransform/asmgcroot.py
+++ b/pypy/rpython/memory/gctransform/asmgcroot.py
@@ -442,6 +442,8 @@
         ll_assert(location >= 0, "negative location")
         kind = location & LOC_MASK
         offset = location & ~ LOC_MASK
+        if IS_64_BITS:
+            offset <<= 1
         if kind == LOC_REG:   # register
             if location == LOC_NOWHERE:
                 return llmemory.NULL
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -233,7 +233,7 @@
         # for tests
         self.frameworkgc__teardown_ptr = getfn(frameworkgc__teardown, [],
                                                annmodel.s_None)
-        
+
         if root_walker.need_root_stack:
             self.incr_stack_ptr = getfn(root_walker.incr_stack,
                                        [annmodel.SomeInteger()],
@@ -248,7 +248,7 @@
             self.decr_stack_ptr = None
         self.weakref_deref_ptr = self.inittime_helper(
             ll_weakref_deref, [llmemory.WeakRefPtr], llmemory.Address)
-        
+
         classdef = bk.getuniqueclassdef(GCClass)
         s_gc = annmodel.SomeInstance(classdef)
         s_gcref = annmodel.SomePtr(llmemory.GCREF)
@@ -313,6 +313,13 @@
                 [s_gc, annmodel.SomeInteger(knowntype=llgroup.r_halfword)],
                 annmodel.SomeInteger())
 
+        if hasattr(GCClass, 'write_barrier'):
+            self.wb_ptr = getfn(GCClass.write_barrier.im_func,
+                                [s_gc] + [annmodel.SomeAddress()] * 2,
+                                annmodel.s_None)
+        elif GCClass.needs_write_barrier:
+            raise NotImplementedError("Add a write_barrier")
+
         if hasattr(GCClass, 'writebarrier_before_copy'):
             self.wb_before_copy_ptr = \
                     getfn(GCClass.writebarrier_before_copy.im_func,
@@ -638,7 +645,7 @@
             if self.collect_analyzer.analyze_direct_call(graph):
                 raise Exception("'no_collect' function can trigger collection:"
                                 " %s" % func)
-            
+
         if self.write_barrier_ptr:
             self.clean_sets = (
                 find_initializing_stores(self.collect_analyzer, graph))
@@ -905,6 +912,19 @@
             TYPE = v_ob.concretetype.TO
             gen_zero_gc_pointers(TYPE, v_ob, hop.llops)
 
+    def gct_gc_writebarrier(self, hop):
+        op = hop.spaceop
+        if not hasattr(self, "wb_ptr"):
+            return
+
+        newvalue_addr = hop.genop('cast_ptr_to_adr', [op.args[0]],
+                                  resulttype=llmemory.Address)
+        struct_addr = hop.genop('cast_ptr_to_adr', [op.args[1]],
+                                resulttype=llmemory.Address)
+        hop.genop('direct_call',
+                  [self.wb_ptr, self.c_const_gc, newvalue_addr, struct_addr])
+
+
     def gct_gc_writebarrier_before_copy(self, hop):
         op = hop.spaceop
         if not hasattr(self, 'wb_before_copy_ptr'):
diff --git a/pypy/rpython/memory/gctransform/transform.py b/pypy/rpython/memory/gctransform/transform.py
--- a/pypy/rpython/memory/gctransform/transform.py
+++ b/pypy/rpython/memory/gctransform/transform.py
@@ -377,6 +377,9 @@
     def gct_zero_gc_pointers_inside(self, hop):
         pass
 
+    def gct_gc_writebarrier(self, hop):
+        pass
+
     def gct_gc_writebarrier_before_copy(self, hop):
         # We take the conservative default and return False here, meaning
         # that rgc.ll_arraycopy() will do the copy by hand (i.e. with a
@@ -520,11 +523,11 @@
         flags = hop.spaceop.args[1].value
         flavor = flags['flavor']
         meth = getattr(self, 'gct_fv_%s_malloc' % flavor, None)
-        assert meth, "%s has no support for malloc with flavor %r" % (self, flavor) 
+        assert meth, "%s has no support for malloc with flavor %r" % (self, flavor)
         c_size = rmodel.inputconst(lltype.Signed, llmemory.sizeof(TYPE))
         v_raw = meth(hop, flags, TYPE, c_size)
         hop.cast_result(v_raw)
- 
+
     def gct_fv_raw_malloc(self, hop, flags, TYPE, c_size):
         v_raw = hop.genop("direct_call", [self.raw_malloc_fixedsize_ptr, c_size],
                           resulttype=llmemory.Address)
@@ -548,7 +551,7 @@
         flavor = flags['flavor']
         assert flavor != 'cpy', "cannot malloc CPython objects directly"
         meth = getattr(self, 'gct_fv_%s_malloc_varsize' % flavor, None)
-        assert meth, "%s has no support for malloc_varsize with flavor %r" % (self, flavor) 
+        assert meth, "%s has no support for malloc_varsize with flavor %r" % (self, flavor)
         return self.varsize_malloc_helper(hop, flags, meth, [])
 
     def gct_malloc_nonmovable(self, *args, **kwds):
diff --git a/pypy/rpython/memory/gcwrapper.py b/pypy/rpython/memory/gcwrapper.py
--- a/pypy/rpython/memory/gcwrapper.py
+++ b/pypy/rpython/memory/gcwrapper.py
@@ -132,7 +132,7 @@
         result = llmemory.cast_adr_to_ptr(addr, gctypelayout.WEAKREFPTR)
         result.weakptr = llmemory.cast_ptr_to_adr(objgetter())
         return llmemory.cast_ptr_to_weakrefptr(result)
-    
+
     def weakref_deref(self, PTRTYPE, obj):
         addr = gctypelayout.ll_weakref_deref(obj)
         return llmemory.cast_adr_to_ptr(addr, PTRTYPE)
@@ -141,6 +141,12 @@
         ptr = lltype.cast_opaque_ptr(llmemory.GCREF, ptr)
         return self.gc.id(ptr)
 
+    def writebarrier(self, newvalue, struct):
+        if self.gc.needs_write_barrier:
+            newvalue_addr = llmemory.cast_ptr_to_adr(newvalue)
+            struct_addr = llmemory.cast_ptr_to_adr(struct)
+            self.gc.write_barrier(newvalue_addr, struct_addr)
+
     def writebarrier_before_copy(self, source, dest,
                                  source_start, dest_start, length):
         if self.gc.needs_write_barrier:
diff --git a/pypy/rpython/memory/test/test_gc.py b/pypy/rpython/memory/test/test_gc.py
--- a/pypy/rpython/memory/test/test_gc.py
+++ b/pypy/rpython/memory/test/test_gc.py
@@ -6,9 +6,9 @@
 from pypy.rpython.test.test_llinterp import get_interpreter
 from pypy.rpython.lltypesystem import lltype
 from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.objectmodel import compute_unique_id
+from pypy.rlib.objectmodel import we_are_translated, compute_unique_id
 from pypy.rlib import rgc
+from pypy.rlib.rerased_raw import UntypedStorage
 from pypy.rlib.rstring import StringBuilder
 from pypy.rlib.rarithmetic import LONG_BIT
 
@@ -86,7 +86,7 @@
         for i in range(1, 15):
             res = self.interpret(append_to_list, [i, i - 1])
             assert res == i - 1 # crashes if constants are not considered roots
-            
+
     def test_string_concatenation(self):
         #curr = simulator.current_size
         def concat(j):
@@ -746,6 +746,43 @@
         res = self.interpret(fn, [])
         assert res == ord('y')
 
+    def test_untypedstorage(self):
+        class A(object):
+            def __init__(self, v):
+                self.v = v
+
+        def fn(v):
+            s = UntypedStorage("io")
+            s.setint(0, v)
+            s.setinstance(1, A(v))
+            rgc.collect()
+            return s.getint(0) + s.getinstance(1, A).v
+
+        res = self.interpret(fn, [10])
+        assert res == 20
+
+    def test_untyped_storage_multipled_objects(self):
+        class A(object):
+            def __init__(self, v):
+                self.v = v
+
+        def fn():
+            s = UntypedStorage("oioio")
+            s.setinstance(0, A(1))
+            s.setint(1, 2)
+            s.setinstance(2, A(3))
+            s.setint(3, 4)
+            s.setinstance(4, A(5))
+            rgc.collect()
+            return (s.getinstance(0, A).v * 1 + s.getint(1) * 10 +
+                s.getinstance(2, A).v * 100 + s.getint(3) * 1000 +
+                s.getinstance(4, A).v * 10000)
+
+        res = self.interpret(fn, [])
+        assert res == 54321
+
+
+
 from pypy.rlib.objectmodel import UnboxedValue
 
 class TaggedBase(object):
diff --git a/pypy/rpython/memory/test/test_transformed_gc.py b/pypy/rpython/memory/test/test_transformed_gc.py
--- a/pypy/rpython/memory/test/test_transformed_gc.py
+++ b/pypy/rpython/memory/test/test_transformed_gc.py
@@ -13,6 +13,7 @@
 from pypy import conftest
 from pypy.rlib.rstring import StringBuilder
 from pypy.rlib.rarithmetic import LONG_BIT
+from pypy.rlib.rerased_raw import UntypedStorage
 
 WORD = LONG_BIT // 8
 
@@ -834,7 +835,7 @@
             from pypy.translator.translator import graphof
             from pypy.objspace.flow.model import Constant
             from pypy.rpython.lltypesystem import rffi
-            layoutbuilder = cls.ensure_layoutbuilder(translator)            
+            layoutbuilder = cls.ensure_layoutbuilder(translator)
             type_id = layoutbuilder.get_type_id(P)
             #
             # now fix the do_malloc_fixedsize_clear in the graph of g
@@ -928,6 +929,48 @@
         run = self.runner("writebarrier_before_copy")
         run([])
 
+    def define_untyped_storage(cls):
+        class A(object):
+            def __init__(self, v):
+                self.v = v
+
+        def fn():
+            s = UntypedStorage("io")
+            s.setint(0, 10)
+            s.setinstance(1, A(10))
+            rgc.collect()
+            return s.getint(0) + s.getinstance(1, A).v
+        return fn
+
+    def test_untyped_storage(self):
+        run = self.runner("untyped_storage")
+        res = run([])
+        assert res == 20
+
+    def define_untyped_storage_multiple_objects(cls):
+        class A(object):
+            def __init__(self, v):
+                self.v = v
+
+        def fn():
+            s = UntypedStorage("oioio")
+            s.setinstance(0, A(1))
+            s.setint(1, 2)
+            s.setinstance(2, A(3))
+            s.setint(3, 4)
+            s.setinstance(4, A(5))
+            rgc.collect()
+            return (s.getinstance(0, A).v * 1 + s.getint(1) * 10 +
+                s.getinstance(2, A).v * 100 + s.getint(3) * 1000 +
+                s.getinstance(4, A).v * 10000)
+        return fn
+
+    def test_untyped_storage_multipled_objects(self):
+        run = self.runner("untyped_storage_multiple_objects")
+        res = run([])
+        assert res == 54321
+
+
 # ________________________________________________________________
 
 class TestMarkSweepGC(GenericGCTests):
@@ -1152,7 +1195,7 @@
 
     def test_adr_of_nursery(self):
         run = self.runner("adr_of_nursery")
-        res = run([])        
+        res = run([])
 
 class TestGenerationalNoFullCollectGC(GCTest):
     # test that nursery is doing its job and that no full collection
diff --git a/pypy/rpython/rclass.py b/pypy/rpython/rclass.py
--- a/pypy/rpython/rclass.py
+++ b/pypy/rpython/rclass.py
@@ -364,6 +364,8 @@
     def get_ll_hash_function(self):
         return ll_inst_hash
 
+    get_ll_fasthash_function = get_ll_hash_function
+
     def rtype_type(self, hop):
         raise NotImplementedError
 
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -449,6 +449,21 @@
 
         assert r_AB_dic.lowleveltype == r_BA_dic.lowleveltype
 
+    def test_identity_hash_is_fast(self):
+        class A(object):
+            pass
+
+        def f():
+            return {A(): 1}
+
+        t = TranslationContext()
+        s = t.buildannotator().build_types(f, [])
+        rtyper = t.buildrtyper()
+        rtyper.specialize()
+
+        r_dict = rtyper.getrepr(s)
+        assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+
     def test_tuple_dict(self):
         def f(i):
             d = {}
diff --git a/pypy/rpython/test/test_rlist.py b/pypy/rpython/test/test_rlist.py
--- a/pypy/rpython/test/test_rlist.py
+++ b/pypy/rpython/test/test_rlist.py
@@ -8,6 +8,7 @@
 from pypy.rpython.rlist import *
 from pypy.rpython.lltypesystem.rlist import ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist
 from pypy.rpython.lltypesystem import rlist as ll_rlist
+from pypy.rpython.llinterp import LLException
 from pypy.rpython.ootypesystem import rlist as oo_rlist
 from pypy.rpython.rint import signed_repr
 from pypy.objspace.flow.model import Constant, Variable
@@ -1361,13 +1362,12 @@
                    ("y[*]" in immutable_fields)
 
     def test_hints(self):
-        from pypy.rlib.objectmodel import newlist
-        from pypy.rpython.annlowlevel import hlstr
+        from pypy.rlib.objectmodel import newlist_hint
 
         strings = ['abc', 'def']
         def f(i):
             z = strings[i]
-            x = newlist(sizehint=13)
+            x = newlist_hint(sizehint=13)
             x += z
             return ''.join(x)
 
@@ -1477,6 +1477,80 @@
         assert func1.oopspec == 'list.getitem_foldable(l, index)'
         assert not hasattr(func2, 'oopspec')
 
+    def test_iterate_over_immutable_list(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        lst = list('abcdef')
+        def dummyfn():
+            total = 0
+            for c in lst:
+                total += ord(c)
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            e = raises(LLException, self.interpret, dummyfn, [])
+            assert 'KeyError' in str(e.value)
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+    def test_iterate_over_immutable_list_quasiimmut_attr(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        class Foo:
+            _immutable_fields_ = ['lst?[*]']
+            lst = list('abcdef')
+        foo = Foo()
+        def dummyfn():
+            total = 0
+            for c in foo.lst:
+                total += ord(c)
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            e = raises(LLException, self.interpret, dummyfn, [])
+            assert 'KeyError' in str(e.value)
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+    def test_iterate_over_mutable_list(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        lst = list('abcdef')
+        def dummyfn():
+            total = 0
+            for c in lst:
+                total += ord(c)
+            lst[0] = 'x'
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            res = self.interpret(dummyfn, [])
+            assert res == sum(map(ord, 'abcdef'))
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+
 class TestOOtype(BaseTestRlist, OORtypeMixin):
     rlist = oo_rlist
     type_system = 'ootype'
diff --git a/pypy/rpython/tool/rfficache.py b/pypy/rpython/tool/rfficache.py
--- a/pypy/rpython/tool/rfficache.py
+++ b/pypy/rpython/tool/rfficache.py
@@ -14,6 +14,8 @@
 
 def ask_gcc(question, add_source=""):
     includes = ['stdlib.h', 'stdio.h', 'sys/types.h']
+    if os.name != 'nt':
+        includes += ['inttypes.h']
     include_string = "\n".join(["#include <%s>" % i for i in includes])
     c_source = py.code.Source('''
     // includes
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -93,7 +93,7 @@
                         end_index += 1
                     op.asm = '\n'.join([asm[i][1] for i in range(asm_index, end_index)])
         return loop
-                    
+
     def _asm_disassemble(self, d, origin_addr, tp):
         from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
         return list(machine_code_dump(d, tp, origin_addr))
@@ -109,7 +109,7 @@
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.split(", ", 1), None
+            return argspec.split(", ", 2), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -159,7 +159,7 @@
         for op in operations:
             if op.name == 'debug_merge_point':
                 self.inline_level = int(op.args[0])
-                self.parse_code_data(op.args[1][1:-1])
+                self.parse_code_data(op.args[2][1:-1])
                 break
         else:
             self.inline_level = 0
@@ -387,7 +387,7 @@
             m = re.search('guard \d+', comm)
             name = m.group(0)
         else:
-            name = comm[2:comm.find(':')-1]
+            name = " ".join(comm[2:].split(" ", 2)[:2])
         if name in dumps:
             bname, start_ofs, dump = dumps[name]
             loop.force_asm = (lambda dump=dump, start_ofs=start_ofs,
@@ -417,7 +417,7 @@
         part.descr = descrs[i]
         part.comment = trace.comment
         parts.append(part)
-    
+
     return parts
 
 def parse_log_counts(input, loops):
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -29,7 +29,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point(0, "SomeRandomStuff")
+    debug_merge_point(0, 0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -39,10 +39,10 @@
     ops = parse('''
     [i0]
     label()
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage(), loopname='<loopname>')
@@ -57,12 +57,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
+    debug_merge_point(0, 0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
-    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, 0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -75,10 +75,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -92,10 +92,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -105,10 +105,10 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
@@ -119,11 +119,11 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
@@ -135,7 +135,7 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point(0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
+    debug_merge_point(0, 0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -181,7 +181,7 @@
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
+    debug_merge_point(0, 0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
@@ -193,12 +193,12 @@
     loop = parse("""
     # Loop 0 : loop with 19 ops
     [p0, p1, p2, p3, i4]
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
     +166: i6 = int_lt(i4, 10000)
     guard_true(i6, descr=<Guard3>) [p1, p0, p2, p3, i4]
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
     +179: i8 = int_add(i4, 1)
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
     +183: i10 = getfield_raw(40564608, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
     +191: i12 = int_sub(i10, 1)
     +195: setfield_raw(40564608, i12, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
@@ -287,8 +287,8 @@
 def test_parse_nonpython():
     loop = parse("""
     []
-    debug_merge_point(0, 'random')
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    debug_merge_point(0, 0, 'random')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
     """)
     f = Function.from_operations(loop.operations, LoopStorage())
     assert f.chunks[-1].filename == 'x.py'
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -60,7 +60,7 @@
     if sys.platform == 'win32':
         # Can't rename a DLL: it is always called 'libpypy-c.dll'
         for extra in ['libpypy-c.dll',
-                      'libexpat.dll', 'sqlite3.dll', 'msvcr90.dll',
+                      'libexpat.dll', 'sqlite3.dll', 'msvcr100.dll',
                       'libeay32.dll', 'ssleay32.dll']:
             p = pypy_c.dirpath().join(extra)
             if not p.check():
@@ -82,6 +82,9 @@
     for file in ['LICENSE', 'README']:
         shutil.copy(str(basedir.join(file)), str(pypydir))
     pypydir.ensure('include', dir=True)
+    if sys.platform == 'win32':
+        shutil.copyfile(str(pypy_c.dirpath().join("libpypy-c.lib")),
+                        str(pypydir.join('include/python27.lib')))
     # we want to put there all *.h and *.inl from trunk/include
     # and from pypy/_interfaces
     includedir = basedir.join('include')
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -13,13 +13,17 @@
 ARGUMENT_REGISTERS_64 = ('%rdi', '%rsi', '%rdx', '%rcx', '%r8', '%r9')
 
 
-def frameloc_esp(offset):
+def frameloc_esp(offset, wordsize):
     assert offset >= 0
-    assert offset % 4 == 0
+    assert offset % wordsize == 0
+    if wordsize == 8:    # in this case, there are 3 null bits, but we
+        offset >>= 1     # only need 2 of them
     return LOC_ESP_PLUS | offset
 
-def frameloc_ebp(offset):
-    assert offset % 4 == 0
+def frameloc_ebp(offset, wordsize):
+    assert offset % wordsize == 0
+    if wordsize == 8:    # in this case, there are 3 null bits, but we
+        offset >>= 1     # only need 2 of them
     if offset >= 0:
         return LOC_EBP_PLUS | offset
     else:
@@ -57,12 +61,12 @@
             # try to use esp-relative addressing
             ofs_from_esp = framesize + self.ofs_from_frame_end
             if ofs_from_esp % 2 == 0:
-                return frameloc_esp(ofs_from_esp)
+                return frameloc_esp(ofs_from_esp, wordsize)
             # we can get an odd value if the framesize is marked as bogus
             # by visit_andl()
         assert uses_frame_pointer
         ofs_from_ebp = self.ofs_from_frame_end + wordsize
-        return frameloc_ebp(ofs_from_ebp)
+        return frameloc_ebp(ofs_from_ebp, wordsize)
 
 
 class Insn(object):
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -78,9 +78,9 @@
             if self.is_stack_bottom:
                 retaddr = LOC_NOWHERE     # end marker for asmgcroot.py
             elif self.uses_frame_pointer:
-                retaddr = frameloc_ebp(self.WORD)
+                retaddr = frameloc_ebp(self.WORD, self.WORD)
             else:
-                retaddr = frameloc_esp(insn.framesize)
+                retaddr = frameloc_esp(insn.framesize, self.WORD)
             shape = [retaddr]
             # the first gcroots are always the ones corresponding to
             # the callee-saved registers
@@ -472,7 +472,7 @@
 
     IGNORE_OPS_WITH_PREFIXES = dict.fromkeys([
         'cmp', 'test', 'set', 'sahf', 'lahf', 'cld', 'std',
-        'rep', 'movs', 'lods', 'stos', 'scas', 'cwde', 'prefetch',
+        'rep', 'movs', 'movhp', 'lods', 'stos', 'scas', 'cwde', 'prefetch',
         # floating-point operations cannot produce GC pointers
         'f',
         'cvt', 'ucomi', 'comi', 'subs', 'subp' , 'adds', 'addp', 'xorp',
@@ -484,7 +484,7 @@
         'shl', 'shr', 'sal', 'sar', 'rol', 'ror', 'mul', 'imul', 'div', 'idiv',
         'bswap', 'bt', 'rdtsc',
         'punpck', 'pshufd', 'pcmp', 'pand', 'psllw', 'pslld', 'psllq',
-        'paddq', 'pinsr',
+        'paddq', 'pinsr', 'pmul', 'psrl',
         # sign-extending moves should not produce GC pointers
         'cbtw', 'cwtl', 'cwtd', 'cltd', 'cltq', 'cqto',
         # zero-extending moves should not produce GC pointers
@@ -894,6 +894,8 @@
             return '%' + cls.CALLEE_SAVE_REGISTERS[reg].replace("%", "")
         else:
             offset = loc & ~ LOC_MASK
+            if cls.WORD == 8:
+                offset <<= 1
             if kind == LOC_EBP_PLUS:
                 result = '(%' + cls.EBP.replace("%", "") + ')'
             elif kind == LOC_EBP_MINUS:
diff --git a/pypy/translator/c/src/asm_gcc_x86.h b/pypy/translator/c/src/asm_gcc_x86.h
--- a/pypy/translator/c/src/asm_gcc_x86.h
+++ b/pypy/translator/c/src/asm_gcc_x86.h
@@ -102,6 +102,12 @@
 #endif  /* !PYPY_CPU_HAS_STANDARD_PRECISION */
 
 
+#ifdef PYPY_X86_CHECK_SSE2
+#define PYPY_X86_CHECK_SSE2_DEFINED
+extern void pypy_x86_check_sse2(void);
+#endif
+