[pypy-commit] pypy sepcomp2: hg merge default

amauryfa noreply at buildbot.pypy.org
Wed Mar 14 00:00:32 CET 2012


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: sepcomp2
Changeset: r53513:faf523b0251e
Date: 2012-03-14 00:00 +0100
http://bitbucket.org/pypy/pypy/changeset/faf523b0251e/

Log:	hg merge default

diff too long, truncating to 10000 out of 10176 lines

diff --git a/lib-python/2.7/SimpleXMLRPCServer.py b/lib-python/2.7/SimpleXMLRPCServer.py
--- a/lib-python/2.7/SimpleXMLRPCServer.py
+++ b/lib-python/2.7/SimpleXMLRPCServer.py
@@ -486,7 +486,10 @@
             L = []
             while size_remaining:
                 chunk_size = min(size_remaining, max_chunk_size)
-                L.append(self.rfile.read(chunk_size))
+                chunk = self.rfile.read(chunk_size)
+                if not chunk:
+                    break
+                L.append(chunk)
                 size_remaining -= len(L[-1])
             data = ''.join(L)
 
diff --git a/lib-python/2.7/test/test_xmlrpc.py b/lib-python/2.7/test/test_xmlrpc.py
--- a/lib-python/2.7/test/test_xmlrpc.py
+++ b/lib-python/2.7/test/test_xmlrpc.py
@@ -308,7 +308,7 @@
         global ADDR, PORT, URL
         ADDR, PORT = serv.socket.getsockname()
         #connect to IP address directly.  This avoids socket.create_connection()
-        #trying to connect to to "localhost" using all address families, which
+        #trying to connect to "localhost" using all address families, which
         #causes slowdown e.g. on vista which supports AF_INET6.  The server listens
         #on AF_INET only.
         URL = "http://%s:%d"%(ADDR, PORT)
@@ -367,7 +367,7 @@
         global ADDR, PORT, URL
         ADDR, PORT = serv.socket.getsockname()
         #connect to IP address directly.  This avoids socket.create_connection()
-        #trying to connect to to "localhost" using all address families, which
+        #trying to connect to "localhost" using all address families, which
         #causes slowdown e.g. on vista which supports AF_INET6.  The server listens
         #on AF_INET only.
         URL = "http://%s:%d"%(ADDR, PORT)
@@ -472,6 +472,9 @@
                 # protocol error; provide additional information in test output
                 self.fail("%s\n%s" % (e, getattr(e, "headers", "")))
 
+    def test_unicode_host(self):
+        server = xmlrpclib.ServerProxy(u"http://%s:%d/RPC2"%(ADDR, PORT))
+        self.assertEqual(server.add("a", u"\xe9"), u"a\xe9")
 
     # [ch] The test 404 is causing lots of false alarms.
     def XXXtest_404(self):
@@ -586,6 +589,12 @@
         # This avoids waiting for the socket timeout.
         self.test_simple1()
 
+    def test_partial_post(self):
+        # Check that a partial POST doesn't make the server loop: issue #14001.
+        conn = httplib.HTTPConnection(ADDR, PORT)
+        conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')
+        conn.close()
+
 class MultiPathServerTestCase(BaseServerTestCase):
     threadFunc = staticmethod(http_multi_server)
     request_count = 2
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -311,7 +311,7 @@
     RegrTest('test_mimetypes.py'),
     RegrTest('test_MimeWriter.py', core=False),
     RegrTest('test_minidom.py'),
-    RegrTest('test_mmap.py'),
+    RegrTest('test_mmap.py', usemodules="mmap"),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
     RegrTest('test_msilib.py', skip=only_win32),
diff --git a/lib-python/modified-2.7/ctypes/test/test_arrays.py b/lib-python/modified-2.7/ctypes/test/test_arrays.py
--- a/lib-python/modified-2.7/ctypes/test/test_arrays.py
+++ b/lib-python/modified-2.7/ctypes/test/test_arrays.py
@@ -1,12 +1,23 @@
 import unittest
 from ctypes import *
+from test.test_support import impl_detail
 
 formats = "bBhHiIlLqQfd"
 
+# c_longdouble commented out for PyPy, look at the commend in test_longdouble
 formats = c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, \
-          c_long, c_ulonglong, c_float, c_double, c_longdouble
+          c_long, c_ulonglong, c_float, c_double #, c_longdouble
 
 class ArrayTestCase(unittest.TestCase):
+
+    @impl_detail('long double not supported by PyPy', pypy=False)
+    def test_longdouble(self):
+        """
+        This test is empty. It's just here to remind that we commented out
+        c_longdouble in "formats". If pypy will ever supports c_longdouble, we
+        should kill this test and uncomment c_longdouble inside formats.
+        """
+
     def test_simple(self):
         # create classes holding simple numeric types, and check
         # various properties.
diff --git a/lib-python/modified-2.7/distutils/command/bdist_wininst.py b/lib-python/modified-2.7/distutils/command/bdist_wininst.py
--- a/lib-python/modified-2.7/distutils/command/bdist_wininst.py
+++ b/lib-python/modified-2.7/distutils/command/bdist_wininst.py
@@ -298,7 +298,8 @@
                              bitmaplen,        # number of bytes in bitmap
                              )
         file.write(header)
-        file.write(open(arcname, "rb").read())
+        with open(arcname, "rb") as arcfile:
+            file.write(arcfile.read())
 
     # create_exe()
 
diff --git a/lib-python/modified-2.7/distutils/sysconfig_pypy.py b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
--- a/lib-python/modified-2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/modified-2.7/distutils/sysconfig_pypy.py
@@ -60,6 +60,7 @@
     g['EXE'] = ""
     g['SO'] = _get_so_extension() or ".so"
     g['SOABI'] = g['SO'].rsplit('.')[0]
+    g['LIBDIR'] = os.path.join(sys.prefix, 'lib')
 
     global _config_vars
     _config_vars = g
diff --git a/lib-python/modified-2.7/opcode.py b/lib-python/modified-2.7/opcode.py
--- a/lib-python/modified-2.7/opcode.py
+++ b/lib-python/modified-2.7/opcode.py
@@ -192,5 +192,6 @@
 def_op('LOOKUP_METHOD', 201)          # Index in name list
 hasname.append(201)
 def_op('CALL_METHOD', 202)            # #args not including 'self'
+def_op('BUILD_LIST_FROM_ARG', 203)
 
 del def_op, name_op, jrel_op, jabs_op
diff --git a/lib-python/modified-2.7/test/test_dis.py b/lib-python/modified-2.7/test/test_dis.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/test/test_dis.py
@@ -0,0 +1,150 @@
+# Minimal tests for dis module
+
+from test.test_support import run_unittest
+import unittest
+import sys
+import dis
+import StringIO
+
+
+def _f(a):
+    print a
+    return 1
+
+dis_f = """\
+ %-4d         0 LOAD_FAST                0 (a)
+              3 PRINT_ITEM
+              4 PRINT_NEWLINE
+
+ %-4d         5 LOAD_CONST               1 (1)
+              8 RETURN_VALUE
+"""%(_f.func_code.co_firstlineno + 1,
+     _f.func_code.co_firstlineno + 2)
+
+
+def bug708901():
+    for res in range(1,
+                     10):
+        pass
+
+dis_bug708901 = """\
+ %-4d         0 SETUP_LOOP              23 (to 26)
+              3 LOAD_GLOBAL              0 (range)
+              6 LOAD_CONST               1 (1)
+
+ %-4d         9 LOAD_CONST               2 (10)
+             12 CALL_FUNCTION            2
+             15 GET_ITER
+        >>   16 FOR_ITER                 6 (to 25)
+             19 STORE_FAST               0 (res)
+
+ %-4d        22 JUMP_ABSOLUTE           16
+        >>   25 POP_BLOCK
+        >>   26 LOAD_CONST               0 (None)
+             29 RETURN_VALUE
+"""%(bug708901.func_code.co_firstlineno + 1,
+     bug708901.func_code.co_firstlineno + 2,
+     bug708901.func_code.co_firstlineno + 3)
+
+
+def bug1333982(x=[]):
+    assert 0, ([s for s in x] +
+              1)
+    pass
+
+dis_bug1333982 = """\
+ %-4d         0 LOAD_CONST               1 (0)
+              3 POP_JUMP_IF_TRUE        38
+              6 LOAD_GLOBAL              0 (AssertionError)
+              9 LOAD_FAST                0 (x)
+             12 BUILD_LIST_FROM_ARG      0
+             15 GET_ITER
+        >>   16 FOR_ITER                12 (to 31)
+             19 STORE_FAST               1 (s)
+             22 LOAD_FAST                1 (s)
+             25 LIST_APPEND              2
+             28 JUMP_ABSOLUTE           16
+
+ %-4d   >>   31 LOAD_CONST               2 (1)
+             34 BINARY_ADD
+             35 RAISE_VARARGS            2
+
+ %-4d   >>   38 LOAD_CONST               0 (None)
+             41 RETURN_VALUE
+"""%(bug1333982.func_code.co_firstlineno + 1,
+     bug1333982.func_code.co_firstlineno + 2,
+     bug1333982.func_code.co_firstlineno + 3)
+
+_BIG_LINENO_FORMAT = """\
+%3d           0 LOAD_GLOBAL              0 (spam)
+              3 POP_TOP
+              4 LOAD_CONST               0 (None)
+              7 RETURN_VALUE
+"""
+
+class DisTests(unittest.TestCase):
+    def do_disassembly_test(self, func, expected):
+        s = StringIO.StringIO()
+        save_stdout = sys.stdout
+        sys.stdout = s
+        dis.dis(func)
+        sys.stdout = save_stdout
+        got = s.getvalue()
+        # Trim trailing blanks (if any).
+        lines = got.split('\n')
+        lines = [line.rstrip() for line in lines]
+        expected = expected.split("\n")
+        import difflib
+        if expected != lines:
+            self.fail(
+                "events did not match expectation:\n" +
+                "\n".join(difflib.ndiff(expected,
+                                        lines)))
+
+    def test_opmap(self):
+        self.assertEqual(dis.opmap["STOP_CODE"], 0)
+        self.assertIn(dis.opmap["LOAD_CONST"], dis.hasconst)
+        self.assertIn(dis.opmap["STORE_NAME"], dis.hasname)
+
+    def test_opname(self):
+        self.assertEqual(dis.opname[dis.opmap["LOAD_FAST"]], "LOAD_FAST")
+
+    def test_boundaries(self):
+        self.assertEqual(dis.opmap["EXTENDED_ARG"], dis.EXTENDED_ARG)
+        self.assertEqual(dis.opmap["STORE_NAME"], dis.HAVE_ARGUMENT)
+
+    def test_dis(self):
+        self.do_disassembly_test(_f, dis_f)
+
+    def test_bug_708901(self):
+        self.do_disassembly_test(bug708901, dis_bug708901)
+
+    def test_bug_1333982(self):
+        # This one is checking bytecodes generated for an `assert` statement,
+        # so fails if the tests are run with -O.  Skip this test then.
+        if __debug__:
+            self.do_disassembly_test(bug1333982, dis_bug1333982)
+
+    def test_big_linenos(self):
+        def func(count):
+            namespace = {}
+            func = "def foo():\n " + "".join(["\n "] * count + ["spam\n"])
+            exec func in namespace
+            return namespace['foo']
+
+        # Test all small ranges
+        for i in xrange(1, 300):
+            expected = _BIG_LINENO_FORMAT % (i + 2)
+            self.do_disassembly_test(func(i), expected)
+
+        # Test some larger ranges too
+        for i in xrange(300, 5000, 10):
+            expected = _BIG_LINENO_FORMAT % (i + 2)
+            self.do_disassembly_test(func(i), expected)
+
+def test_main():
+    run_unittest(DisTests)
+
+
+if __name__ == "__main__":
+    test_main()
diff --git a/lib_pypy/_csv.py b/lib_pypy/_csv.py
--- a/lib_pypy/_csv.py
+++ b/lib_pypy/_csv.py
@@ -414,7 +414,7 @@
 
     def _parse_add_char(self, c):
         if len(self.field) + len(c) > _field_limit:
-            raise Error("field larget than field limit (%d)" % (_field_limit))
+            raise Error("field larger than field limit (%d)" % (_field_limit))
         self.field += c
         
 
diff --git a/lib_pypy/_ctypes/builtin.py b/lib_pypy/_ctypes/builtin.py
--- a/lib_pypy/_ctypes/builtin.py
+++ b/lib_pypy/_ctypes/builtin.py
@@ -31,24 +31,20 @@
     arg = cobj._get_buffer_value()
     return _rawffi.wcharp2rawunicode(arg, lgt)
 
-class ErrorObject(local):
-    def __init__(self):
-        self.errno = 0
-        self.winerror = 0
-_error_object = ErrorObject()
+_err = local()
 
 def get_errno():
-    return _error_object.errno
+    return getattr(_err, "errno", 0)
 
 def set_errno(errno):
-    old_errno = _error_object.errno
-    _error_object.errno = errno
+    old_errno = get_errno()
+    _err.errno = errno
     return old_errno
 
 def get_last_error():
-    return _error_object.winerror
+    return getattr(_err, "winerror", 0)
 
 def set_last_error(winerror):
-    old_winerror = _error_object.winerror
-    _error_object.winerror = winerror
+    old_winerror = get_last_error()
+    _err.winerror = winerror
     return old_winerror
diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py
--- a/lib_pypy/_ctypes/function.py
+++ b/lib_pypy/_ctypes/function.py
@@ -3,7 +3,7 @@
 from _ctypes.primitive import SimpleType, _SimpleCData
 from _ctypes.basics import ArgumentError, keepalive_key
 from _ctypes.basics import is_struct_shape
-from _ctypes.builtin import set_errno, set_last_error
+from _ctypes.builtin import get_errno, set_errno, get_last_error, set_last_error
 import _rawffi
 import _ffi
 import sys
@@ -350,16 +350,24 @@
     def _call_funcptr(self, funcptr, *newargs):
 
         if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
-            set_errno(_rawffi.get_errno())
+            tmp = _rawffi.get_errno()
+            _rawffi.set_errno(get_errno())
+            set_errno(tmp)
         if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
-            set_last_error(_rawffi.get_last_error())
+            tmp = _rawffi.get_last_error()
+            _rawffi.set_last_error(get_last_error())
+            set_last_error(tmp)
         try:
             result = funcptr(*newargs)
         finally:
             if self._flags_ & _rawffi.FUNCFLAG_USE_ERRNO:
-                set_errno(_rawffi.get_errno())
+                tmp = _rawffi.get_errno()
+                _rawffi.set_errno(get_errno())
+                set_errno(tmp)
             if self._flags_ & _rawffi.FUNCFLAG_USE_LASTERROR:
-                set_last_error(_rawffi.get_last_error())
+                tmp = _rawffi.get_last_error()
+                _rawffi.set_last_error(get_last_error())
+                set_last_error(tmp)
         #
         try:
             return self._build_result(self._restype_, result, newargs)
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -2,16 +2,95 @@
 # One-liner implementation of cPickle
 #
 
-from pickle import *
+from pickle import Pickler, dump, dumps, PickleError, PicklingError, UnpicklingError, _EmptyClass
 from pickle import __doc__, __version__, format_version, compatible_formats
+from types import *
+from copy_reg import dispatch_table
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import marshal, struct, sys
 
 try: from __pypy__ import builtinify
 except ImportError: builtinify = lambda f: f
 
+# These are purely informational; no code uses these.
+format_version = "2.0"                  # File format version we write
+compatible_formats = ["1.0",            # Original protocol 0
+                      "1.1",            # Protocol 0 with INST added
+                      "1.2",            # Original protocol 1
+                      "1.3",            # Protocol 1 with BINFLOAT added
+                      "2.0",            # Protocol 2
+                      ]                 # Old format versions we can read
+
+# Keep in synch with cPickle.  This is the highest protocol number we
+# know how to read.
+HIGHEST_PROTOCOL = 2
 
 BadPickleGet = KeyError
 UnpickleableError = PicklingError
 
+MARK            = ord('(')   # push special markobject on stack
+STOP            = ord('.')   # every pickle ends with STOP
+POP             = ord('0')   # discard topmost stack item
+POP_MARK        = ord('1')   # discard stack top through topmost markobject
+DUP             = ord('2')   # duplicate top stack item
+FLOAT           = ord('F')   # push float object; decimal string argument
+INT             = ord('I')   # push integer or bool; decimal string argument
+BININT          = ord('J')   # push four-byte signed int
+BININT1         = ord('K')   # push 1-byte unsigned int
+LONG            = ord('L')   # push long; decimal string argument
+BININT2         = ord('M')   # push 2-byte unsigned int
+NONE            = ord('N')   # push None
+PERSID          = ord('P')   # push persistent object; id is taken from string arg
+BINPERSID       = ord('Q')   #  "       "         "  ;  "  "   "     "  stack
+REDUCE          = ord('R')   # apply callable to argtuple, both on stack
+STRING          = ord('S')   # push string; NL-terminated string argument
+BINSTRING       = ord('T')   # push string; counted binary string argument
+SHORT_BINSTRING = ord('U')   #  "     "   ;    "      "       "      " < 256 bytes
+UNICODE         = ord('V')   # push Unicode string; raw-unicode-escaped'd argument
+BINUNICODE      = ord('X')   #   "     "       "  ; counted UTF-8 string argument
+APPEND          = ord('a')   # append stack top to list below it
+BUILD           = ord('b')   # call __setstate__ or __dict__.update()
+GLOBAL          = ord('c')   # push self.find_class(modname, name); 2 string args
+DICT            = ord('d')   # build a dict from stack items
+EMPTY_DICT      = ord('}')   # push empty dict
+APPENDS         = ord('e')   # extend list on stack by topmost stack slice
+GET             = ord('g')   # push item from memo on stack; index is string arg
+BINGET          = ord('h')   #   "    "    "    "   "   "  ;   "    " 1-byte arg
+INST            = ord('i')   # build & push class instance
+LONG_BINGET     = ord('j')   # push item from memo on stack; index is 4-byte arg
+LIST            = ord('l')   # build list from topmost stack items
+EMPTY_LIST      = ord(']')   # push empty list
+OBJ             = ord('o')   # build & push class instance
+PUT             = ord('p')   # store stack top in memo; index is string arg
+BINPUT          = ord('q')   #   "     "    "   "   " ;   "    " 1-byte arg
+LONG_BINPUT     = ord('r')   #   "     "    "   "   " ;   "    " 4-byte arg
+SETITEM         = ord('s')   # add key+value pair to dict
+TUPLE           = ord('t')   # build tuple from topmost stack items
+EMPTY_TUPLE     = ord(')')   # push empty tuple
+SETITEMS        = ord('u')   # modify dict by adding topmost key+value pairs
+BINFLOAT        = ord('G')   # push float; arg is 8-byte float encoding
+
+TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
+FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
+
+# Protocol 2
+
+PROTO           = ord('\x80')  # identify pickle protocol
+NEWOBJ          = ord('\x81')  # build object by applying cls.__new__ to argtuple
+EXT1            = ord('\x82')  # push object from extension registry; 1-byte index
+EXT2            = ord('\x83')  # ditto, but 2-byte index
+EXT4            = ord('\x84')  # ditto, but 4-byte index
+TUPLE1          = ord('\x85')  # build 1-tuple from stack top
+TUPLE2          = ord('\x86')  # build 2-tuple from two topmost stack items
+TUPLE3          = ord('\x87')  # build 3-tuple from three topmost stack items
+NEWTRUE         = ord('\x88')  # push True
+NEWFALSE        = ord('\x89')  # push False
+LONG1           = ord('\x8a')  # push long from < 256 bytes
+LONG4           = ord('\x8b')  # push really big long
+
+_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+
+
 # ____________________________________________________________
 # XXX some temporary dark magic to produce pickled dumps that are
 #     closer to the ones produced by cPickle in CPython
@@ -44,3 +123,474 @@
     file = StringIO()
     Pickler(file, protocol).dump(obj)
     return file.getvalue()
+
+# Why use struct.pack() for pickling but marshal.loads() for
+# unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
+# marshal.loads() is twice as fast as struct.unpack()!
+mloads = marshal.loads
+
+# Unpickling machinery
+
+class Unpickler(object):
+
+    def __init__(self, file):
+        """This takes a file-like object for reading a pickle data stream.
+
+        The protocol version of the pickle is detected automatically, so no
+        proto argument is needed.
+
+        The file-like object must have two methods, a read() method that
+        takes an integer argument, and a readline() method that requires no
+        arguments.  Both methods should return a string.  Thus file-like
+        object can be a file object opened for reading, a StringIO object,
+        or any other custom object that meets this interface.
+        """
+        self.readline = file.readline
+        self.read = file.read
+        self.memo = {}
+
+    def load(self):
+        """Read a pickled object representation from the open file.
+
+        Return the reconstituted object hierarchy specified in the file.
+        """
+        self.mark = object() # any new unique object
+        self.stack = []
+        self.append = self.stack.append
+        try:
+            key = ord(self.read(1))
+            while key != STOP:
+                self.dispatch[key](self)
+                key = ord(self.read(1))
+        except TypeError:
+            if self.read(1) == '':
+                raise EOFError
+            raise
+        return self.stack.pop()
+
+    # Return largest index k such that self.stack[k] is self.mark.
+    # If the stack doesn't contain a mark, eventually raises IndexError.
+    # This could be sped by maintaining another stack, of indices at which
+    # the mark appears.  For that matter, the latter stack would suffice,
+    # and we wouldn't need to push mark objects on self.stack at all.
+    # Doing so is probably a good thing, though, since if the pickle is
+    # corrupt (or hostile) we may get a clue from finding self.mark embedded
+    # in unpickled objects.
+    def marker(self):
+        k = len(self.stack)-1
+        while self.stack[k] is not self.mark: k -= 1
+        return k
+
+    dispatch = {}
+
+    def load_proto(self):
+        proto = ord(self.read(1))
+        if not 0 <= proto <= 2:
+            raise ValueError, "unsupported pickle protocol: %d" % proto
+    dispatch[PROTO] = load_proto
+
+    def load_persid(self):
+        pid = self.readline()[:-1]
+        self.append(self.persistent_load(pid))
+    dispatch[PERSID] = load_persid
+
+    def load_binpersid(self):
+        pid = self.stack.pop()
+        self.append(self.persistent_load(pid))
+    dispatch[BINPERSID] = load_binpersid
+
+    def load_none(self):
+        self.append(None)
+    dispatch[NONE] = load_none
+
+    def load_false(self):
+        self.append(False)
+    dispatch[NEWFALSE] = load_false
+
+    def load_true(self):
+        self.append(True)
+    dispatch[NEWTRUE] = load_true
+
+    def load_int(self):
+        data = self.readline()
+        if data == FALSE[1:]:
+            val = False
+        elif data == TRUE[1:]:
+            val = True
+        else:
+            try:
+                val = int(data)
+            except ValueError:
+                val = long(data)
+        self.append(val)
+    dispatch[INT] = load_int
+
+    def load_binint(self):
+        self.append(mloads('i' + self.read(4)))
+    dispatch[BININT] = load_binint
+
+    def load_binint1(self):
+        self.append(ord(self.read(1)))
+    dispatch[BININT1] = load_binint1
+
+    def load_binint2(self):
+        self.append(mloads('i' + self.read(2) + '\000\000'))
+    dispatch[BININT2] = load_binint2
+
+    def load_long(self):
+        self.append(long(self.readline()[:-1], 0))
+    dispatch[LONG] = load_long
+
+    def load_long1(self):
+        n = ord(self.read(1))
+        bytes = self.read(n)
+        self.append(decode_long(bytes))
+    dispatch[LONG1] = load_long1
+
+    def load_long4(self):
+        n = mloads('i' + self.read(4))
+        bytes = self.read(n)
+        self.append(decode_long(bytes))
+    dispatch[LONG4] = load_long4
+
+    def load_float(self):
+        self.append(float(self.readline()[:-1]))
+    dispatch[FLOAT] = load_float
+
+    def load_binfloat(self, unpack=struct.unpack):
+        self.append(unpack('>d', self.read(8))[0])
+    dispatch[BINFLOAT] = load_binfloat
+
+    def load_string(self):
+        rep = self.readline()
+        if len(rep) < 3:
+            raise ValueError, "insecure string pickle"
+        if rep[0] == "'" == rep[-2]:
+            rep = rep[1:-2]
+        elif rep[0] == '"' == rep[-2]:
+            rep = rep[1:-2]
+        else:
+            raise ValueError, "insecure string pickle"
+        self.append(rep.decode("string-escape"))
+    dispatch[STRING] = load_string
+
+    def load_binstring(self):
+        L = mloads('i' + self.read(4))
+        self.append(self.read(L))
+    dispatch[BINSTRING] = load_binstring
+
+    def load_unicode(self):
+        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
+    dispatch[UNICODE] = load_unicode
+
+    def load_binunicode(self):
+        L = mloads('i' + self.read(4))
+        self.append(unicode(self.read(L),'utf-8'))
+    dispatch[BINUNICODE] = load_binunicode
+
+    def load_short_binstring(self):
+        L = ord(self.read(1))
+        self.append(self.read(L))
+    dispatch[SHORT_BINSTRING] = load_short_binstring
+
+    def load_tuple(self):
+        k = self.marker()
+        self.stack[k:] = [tuple(self.stack[k+1:])]
+    dispatch[TUPLE] = load_tuple
+
+    def load_empty_tuple(self):
+        self.stack.append(())
+    dispatch[EMPTY_TUPLE] = load_empty_tuple
+
+    def load_tuple1(self):
+        self.stack[-1] = (self.stack[-1],)
+    dispatch[TUPLE1] = load_tuple1
+
+    def load_tuple2(self):
+        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
+    dispatch[TUPLE2] = load_tuple2
+
+    def load_tuple3(self):
+        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
+    dispatch[TUPLE3] = load_tuple3
+
+    def load_empty_list(self):
+        self.stack.append([])
+    dispatch[EMPTY_LIST] = load_empty_list
+
+    def load_empty_dictionary(self):
+        self.stack.append({})
+    dispatch[EMPTY_DICT] = load_empty_dictionary
+
+    def load_list(self):
+        k = self.marker()
+        self.stack[k:] = [self.stack[k+1:]]
+    dispatch[LIST] = load_list
+
+    def load_dict(self):
+        k = self.marker()
+        d = {}
+        items = self.stack[k+1:]
+        for i in range(0, len(items), 2):
+            key = items[i]
+            value = items[i+1]
+            d[key] = value
+        self.stack[k:] = [d]
+    dispatch[DICT] = load_dict
+
+    # INST and OBJ differ only in how they get a class object.  It's not
+    # only sensible to do the rest in a common routine, the two routines
+    # previously diverged and grew different bugs.
+    # klass is the class to instantiate, and k points to the topmost mark
+    # object, following which are the arguments for klass.__init__.
+    def _instantiate(self, klass, k):
+        args = tuple(self.stack[k+1:])
+        del self.stack[k:]
+        instantiated = 0
+        if (not args and
+                type(klass) is ClassType and
+                not hasattr(klass, "__getinitargs__")):
+            try:
+                value = _EmptyClass()
+                value.__class__ = klass
+                instantiated = 1
+            except RuntimeError:
+                # In restricted execution, assignment to inst.__class__ is
+                # prohibited
+                pass
+        if not instantiated:
+            try:
+                value = klass(*args)
+            except TypeError, err:
+                raise TypeError, "in constructor for %s: %s" % (
+                    klass.__name__, str(err)), sys.exc_info()[2]
+        self.append(value)
+
+    def load_inst(self):
+        module = self.readline()[:-1]
+        name = self.readline()[:-1]
+        klass = self.find_class(module, name)
+        self._instantiate(klass, self.marker())
+    dispatch[INST] = load_inst
+
+    def load_obj(self):
+        # Stack is ... markobject classobject arg1 arg2 ...
+        k = self.marker()
+        klass = self.stack.pop(k+1)
+        self._instantiate(klass, k)
+    dispatch[OBJ] = load_obj
+
+    def load_newobj(self):
+        args = self.stack.pop()
+        cls = self.stack[-1]
+        obj = cls.__new__(cls, *args)
+        self.stack[-1] = obj
+    dispatch[NEWOBJ] = load_newobj
+
+    def load_global(self):
+        module = self.readline()[:-1]
+        name = self.readline()[:-1]
+        klass = self.find_class(module, name)
+        self.append(klass)
+    dispatch[GLOBAL] = load_global
+
+    def load_ext1(self):
+        code = ord(self.read(1))
+        self.get_extension(code)
+    dispatch[EXT1] = load_ext1
+
+    def load_ext2(self):
+        code = mloads('i' + self.read(2) + '\000\000')
+        self.get_extension(code)
+    dispatch[EXT2] = load_ext2
+
+    def load_ext4(self):
+        code = mloads('i' + self.read(4))
+        self.get_extension(code)
+    dispatch[EXT4] = load_ext4
+
+    def get_extension(self, code):
+        nil = []
+        obj = _extension_cache.get(code, nil)
+        if obj is not nil:
+            self.append(obj)
+            return
+        key = _inverted_registry.get(code)
+        if not key:
+            raise ValueError("unregistered extension code %d" % code)
+        obj = self.find_class(*key)
+        _extension_cache[code] = obj
+        self.append(obj)
+
+    def find_class(self, module, name):
+        # Subclasses may override this
+        __import__(module)
+        mod = sys.modules[module]
+        klass = getattr(mod, name)
+        return klass
+
+    def load_reduce(self):
+        args = self.stack.pop()
+        func = self.stack[-1]
+        value = self.stack[-1](*args)
+        self.stack[-1] = value
+    dispatch[REDUCE] = load_reduce
+
+    def load_pop(self):
+        del self.stack[-1]
+    dispatch[POP] = load_pop
+
+    def load_pop_mark(self):
+        k = self.marker()
+        del self.stack[k:]
+    dispatch[POP_MARK] = load_pop_mark
+
+    def load_dup(self):
+        self.append(self.stack[-1])
+    dispatch[DUP] = load_dup
+
+    def load_get(self):
+        self.append(self.memo[self.readline()[:-1]])
+    dispatch[GET] = load_get
+
+    def load_binget(self):
+        i = ord(self.read(1))
+        self.append(self.memo[repr(i)])
+    dispatch[BINGET] = load_binget
+
+    def load_long_binget(self):
+        i = mloads('i' + self.read(4))
+        self.append(self.memo[repr(i)])
+    dispatch[LONG_BINGET] = load_long_binget
+
+    def load_put(self):
+        self.memo[self.readline()[:-1]] = self.stack[-1]
+    dispatch[PUT] = load_put
+
+    def load_binput(self):
+        i = ord(self.read(1))
+        self.memo[repr(i)] = self.stack[-1]
+    dispatch[BINPUT] = load_binput
+
+    def load_long_binput(self):
+        i = mloads('i' + self.read(4))
+        self.memo[repr(i)] = self.stack[-1]
+    dispatch[LONG_BINPUT] = load_long_binput
+
+    def load_append(self):
+        value = self.stack.pop()
+        self.stack[-1].append(value)
+    dispatch[APPEND] = load_append
+
+    def load_appends(self):
+        stack = self.stack
+        mark = self.marker()
+        lst = stack[mark - 1]
+        lst.extend(stack[mark + 1:])
+        del stack[mark:]
+    dispatch[APPENDS] = load_appends
+
+    def load_setitem(self):
+        stack = self.stack
+        value = stack.pop()
+        key = stack.pop()
+        dict = stack[-1]
+        dict[key] = value
+    dispatch[SETITEM] = load_setitem
+
+    def load_setitems(self):
+        stack = self.stack
+        mark = self.marker()
+        dict = stack[mark - 1]
+        for i in range(mark + 1, len(stack), 2):
+            dict[stack[i]] = stack[i + 1]
+
+        del stack[mark:]
+    dispatch[SETITEMS] = load_setitems
+
+    def load_build(self):
+        stack = self.stack
+        state = stack.pop()
+        inst = stack[-1]
+        setstate = getattr(inst, "__setstate__", None)
+        if setstate:
+            setstate(state)
+            return
+        slotstate = None
+        if isinstance(state, tuple) and len(state) == 2:
+            state, slotstate = state
+        if state:
+            try:
+                d = inst.__dict__
+                try:
+                    for k, v in state.iteritems():
+                        d[intern(k)] = v
+                # keys in state don't have to be strings
+                # don't blow up, but don't go out of our way
+                except TypeError:
+                    d.update(state)
+
+            except RuntimeError:
+                # XXX In restricted execution, the instance's __dict__
+                # is not accessible.  Use the old way of unpickling
+                # the instance variables.  This is a semantic
+                # difference when unpickling in restricted
+                # vs. unrestricted modes.
+                # Note, however, that cPickle has never tried to do the
+                # .update() business, and always uses
+                #     PyObject_SetItem(inst.__dict__, key, value) in a
+                # loop over state.items().
+                for k, v in state.items():
+                    setattr(inst, k, v)
+        if slotstate:
+            for k, v in slotstate.items():
+                setattr(inst, k, v)
+    dispatch[BUILD] = load_build
+
+    def load_mark(self):
+        self.append(self.mark)
+    dispatch[MARK] = load_mark
+
+#from pickle import decode_long
+
+def decode_long(data):
+    r"""Decode a long from a two's complement little-endian binary string.
+
+    >>> decode_long('')
+    0L
+    >>> decode_long("\xff\x00")
+    255L
+    >>> decode_long("\xff\x7f")
+    32767L
+    >>> decode_long("\x00\xff")
+    -256L
+    >>> decode_long("\x00\x80")
+    -32768L
+    >>> decode_long("\x80")
+    -128L
+    >>> decode_long("\x7f")
+    127L
+    """
+
+    nbytes = len(data)
+    if nbytes == 0:
+        return 0L
+    ind = nbytes - 1
+    while ind and ord(data[ind]) == 0:
+        ind -= 1
+    n = ord(data[ind])
+    while ind:
+        n <<= 8
+        ind -= 1
+        if ord(data[ind]):
+            n += ord(data[ind])
+    if ord(data[nbytes - 1]) >= 128:
+        n -= 1L << (nbytes << 3)
+    return n
+
+def load(f):
+    return Unpickler(f).load()
+
+def loads(str):
+    f = StringIO(str)
+    return Unpickler(f).load()
diff --git a/lib_pypy/datetime.py b/lib_pypy/datetime.py
--- a/lib_pypy/datetime.py
+++ b/lib_pypy/datetime.py
@@ -1032,8 +1032,8 @@
     def __setstate(self, string):
         if len(string) != 4 or not (1 <= ord(string[2]) <= 12):
             raise TypeError("not enough arguments")
-        yhi, ylo, self._month, self._day = map(ord, string)
-        self._year = yhi * 256 + ylo
+        self._month, self._day = ord(string[2]), ord(string[3])
+        self._year = ord(string[0]) * 256 + ord(string[1])
 
     def __reduce__(self):
         return (self.__class__, self._getstate())
@@ -1421,9 +1421,10 @@
     def __setstate(self, string, tzinfo):
         if len(string) != 6 or ord(string[0]) >= 24:
             raise TypeError("an integer is required")
-        self._hour, self._minute, self._second, us1, us2, us3 = \
-                                                            map(ord, string)
-        self._microsecond = (((us1 << 8) | us2) << 8) | us3
+        self._hour, self._minute, self._second = ord(string[0]), \
+                                                 ord(string[1]), ord(string[2])
+        self._microsecond = (((ord(string[3]) << 8) | \
+                            ord(string[4])) << 8) | ord(string[5])
         self._tzinfo = tzinfo
 
     def __reduce__(self):
@@ -1903,10 +1904,11 @@
             return (basestate, self._tzinfo)
 
     def __setstate(self, string, tzinfo):
-        (yhi, ylo, self._month, self._day, self._hour,
-         self._minute, self._second, us1, us2, us3) = map(ord, string)
-        self._year = yhi * 256 + ylo
-        self._microsecond = (((us1 << 8) | us2) << 8) | us3
+        (self._month, self._day, self._hour, self._minute,
+            self._second) = (ord(string[2]), ord(string[3]), ord(string[4]),
+                             ord(string[5]), ord(string[6]))
+        self._year = ord(string[0]) * 256 + ord(string[1])
+        self._microsecond = (((ord(string[7]) << 8) | ord(string[8])) << 8) | ord(string[9])
         self._tzinfo = tzinfo
 
     def __reduce__(self):
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -37,7 +37,11 @@
     try:
         realresult = func(*args)
     except (ValueError, OverflowError):
-        return s_ImpossibleValue   # no possible answer for this precise input
+        # no possible answer for this precise input.  Be conservative
+        # and keep the computation non-constant.  Example:
+        # unichr(constant-that-doesn't-fit-16-bits) on platforms where
+        # the underlying Python has sys.maxunicode == 0xffff.
+        return s_result
     s_realresult = immutablevalue(realresult)
     if not s_result.contains(s_realresult):
         raise Exception("%s%r returned %r, which is not contained in %s" % (
@@ -163,7 +167,7 @@
                         r.const = False
                 return r
                 
-            assert not issubclass(typ, (int,long)) or typ in (bool, int), (
+            assert not issubclass(typ, (int, long)) or typ in (bool, int, long), (
                 "for integers only isinstance(.,int|r_uint) are supported")
  
             if s_obj.is_constant():
@@ -297,7 +301,7 @@
 def robjmodel_instantiate(s_clspbc):
     assert isinstance(s_clspbc, SomePBC)
     clsdef = None
-    more_than_one = len(s_clspbc.descriptions)
+    more_than_one = len(s_clspbc.descriptions) > 1
     for desc in s_clspbc.descriptions:
         cdef = desc.getuniqueclassdef()
         if more_than_one:
diff --git a/pypy/annotation/classdef.py b/pypy/annotation/classdef.py
--- a/pypy/annotation/classdef.py
+++ b/pypy/annotation/classdef.py
@@ -134,12 +134,19 @@
             if self.name not in homedef.classdesc.all_enforced_attrs:
                 self.attr_allowed = False
                 if not self.readonly:
-                    raise NoSuchAttrError(homedef, self.name)
+                    raise NoSuchAttrError(
+                        "setting forbidden attribute %r on %r" % (
+                        self.name, homedef))
 
     def modified(self, classdef='?'):
         self.readonly = False
         if not self.attr_allowed:
-            raise NoSuchAttrError(classdef, self.name)
+            raise NoSuchAttrError(
+                "Attribute %r on %r should be read-only.\n" % (self.name,
+                                                               classdef) +
+                "This error can be caused by another 'getattr' that promoted\n"
+                "the attribute here; the list of read locations is:\n" +
+                '\n'.join([str(loc[0]) for loc in self.read_locations]))
 
 
 class ClassDef(object):
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -398,7 +398,6 @@
             cls = pyobj
             base = object
             baselist = list(cls.__bases__)
-            baselist.reverse()
 
             # special case: skip BaseException in Python 2.5, and pretend
             # that all exceptions ultimately inherit from Exception instead
@@ -408,17 +407,27 @@
             elif baselist == [py.builtin.BaseException]:
                 baselist = [Exception]
 
+            mixins_before = []
+            mixins_after = []
             for b1 in baselist:
                 if b1 is object:
                     continue
                 if b1.__dict__.get('_mixin_', False):
-                    self.add_mixin(b1)
+                    if base is object:
+                        mixins_before.append(b1)
+                    else:
+                        mixins_after.append(b1)
                 else:
                     assert base is object, ("multiple inheritance only supported "
                                             "with _mixin_: %r" % (cls,))
                     base = b1
+            if mixins_before and mixins_after:
+                raise Exception("unsupported: class %r has mixin bases both"
+                                " before and after the regular base" % (self,))
+            self.add_mixins(mixins_after, check_not_in=base)
+            self.add_mixins(mixins_before)
+            self.add_sources_for_class(cls)
 
-            self.add_sources_for_class(cls)
             if base is not object:
                 self.basedesc = bookkeeper.getdesc(base)
 
@@ -480,14 +489,30 @@
                 return
         self.classdict[name] = Constant(value)
 
-    def add_mixin(self, base):
-        for subbase in base.__bases__:
-            if subbase is object:
-                continue
-            assert subbase.__dict__.get("_mixin_", False), ("Mixin class %r has non"
-                "mixin base class %r" % (base, subbase))
-            self.add_mixin(subbase)
-        self.add_sources_for_class(base, mixin=True)
+    def add_mixins(self, mixins, check_not_in=object):
+        if not mixins:
+            return
+        A = type('tmp', tuple(mixins) + (object,), {})
+        mro = A.__mro__
+        assert mro[0] is A and mro[-1] is object
+        mro = mro[1:-1]
+        #
+        skip = set()
+        def add(cls):
+            if cls is not object:
+                for base in cls.__bases__:
+                    add(base)
+                for name in cls.__dict__:
+                    skip.add(name)
+        add(check_not_in)
+        #
+        for base in reversed(mro):
+            assert base.__dict__.get("_mixin_", False), ("Mixin class %r has non"
+                "mixin base class %r" % (mixins, base))
+            for name, value in base.__dict__.items():
+                if name in skip:
+                    continue
+                self.add_source_attribute(name, value, mixin=True)
 
     def add_sources_for_class(self, cls, mixin=False):
         for name, value in cls.__dict__.items():
diff --git a/pypy/annotation/model.py b/pypy/annotation/model.py
--- a/pypy/annotation/model.py
+++ b/pypy/annotation/model.py
@@ -786,12 +786,15 @@
 #
 # safety check that no-one is trying to make annotation and translation
 # faster by providing the -O option to Python.
-try:
-    assert False
-except AssertionError:
-    pass   # fine
-else:
-    raise RuntimeError("The annotator relies on 'assert' statements from the\n"
+import os
+if "WINGDB_PYTHON" not in os.environ:
+    # ...but avoiding this boring check in the IDE
+    try:
+        assert False
+    except AssertionError:
+        pass   # fine
+    else:
+        raise RuntimeError("The annotator relies on 'assert' statements from the\n"
                      "\tannotated program: you cannot run it with 'python -O'.")
 
 # this has the side-effect of registering the unary and binary operations
diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py
--- a/pypy/annotation/test/test_annrpython.py
+++ b/pypy/annotation/test/test_annrpython.py
@@ -1,15 +1,12 @@
 from __future__ import with_statement
-import autopath
 import py.test
 import sys
 from pypy import conftest
-from pypy.tool.udir import udir
 
 from pypy.annotation import model as annmodel
 from pypy.annotation.annrpython import RPythonAnnotator as _RPythonAnnotator
 from pypy.translator.translator import graphof as tgraphof
 from pypy.annotation import policy
-from pypy.annotation import specialize
 from pypy.annotation.listdef import ListDef, ListChangeUnallowed
 from pypy.annotation.dictdef import DictDef
 from pypy.objspace.flow.model import *
@@ -2431,6 +2428,93 @@
         assert isinstance(s.items[1], annmodel.SomeChar)
         assert isinstance(s.items[2], annmodel.SomeChar)
 
+    def test_mixin_first(self):
+        class Mixin(object):
+            _mixin_ = True
+            def foo(self): return 4
+        class Base(object):
+            def foo(self): return 5
+        class Concrete(Mixin, Base):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 4
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 4
+
+    def test_mixin_last(self):
+        class Mixin(object):
+            _mixin_ = True
+            def foo(self): return 4
+        class Base(object):
+            def foo(self): return 5
+        class Concrete(Base, Mixin):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 5
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 5
+
+    def test_mixin_concrete(self):
+        class Mixin(object):
+            _mixin_ = True
+            def foo(self): return 4
+        class Concrete(Mixin):
+            def foo(self): return 5
+        def f():
+            return Concrete().foo()
+
+        assert f() == 5
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 5
+
+    def test_multiple_mixins_mro(self):
+        # an obscure situation, but it occurred in module/micronumpy/types.py
+        class A(object):
+            _mixin_ = True
+            def foo(self): return 1
+        class B(A):
+            _mixin_ = True
+            def foo(self): return 2
+        class C(A):
+            _mixin_ = True
+        class D(B, C):
+            _mixin_ = True
+        class Concrete(D):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 2
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 2
+
+    def test_multiple_mixins_mro_2(self):
+        class A(object):
+            _mixin_ = True
+            def foo(self): return 1
+        class B(A):
+            _mixin_ = True
+            def foo(self): return 2
+        class C(A):
+            _mixin_ = True
+        class Concrete(C, B):
+            pass
+        def f():
+            return Concrete().foo()
+
+        assert f() == 2
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert s.const == 2
+
     def test___class___attribute(self):
         class Base(object): pass
         class A(Base): pass
@@ -2469,6 +2553,26 @@
         s = a.build_types(f, [int])
         assert s.knowntype == int
 
+    def test_slots_reads(self):
+        class A(object):
+            __slots__ = ()
+        class B(A):
+            def __init__(self, x):
+                self.x = x
+        def f(x):
+            if x:
+                a = A()
+            else:
+                a = B(x)
+            return a.x   # should explode here
+
+        a = self.RPythonAnnotator()
+        e = py.test.raises(Exception, a.build_types, f, [int])
+        # this should explode on reading the attribute 'a.x', but it can
+        # sometimes explode on 'self.x = x', which does not make much sense.
+        # But it looks hard to fix in general: we don't know yet during 'a.x'
+        # if the attribute x will be read-only or read-write.
+
     def test_unboxed_value(self):
         class A(object):
             __slots__ = ()
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -13,7 +13,7 @@
                and not p.basename.startswith('test')]
 
 essential_modules = dict.fromkeys(
-    ["exceptions", "_file", "sys", "__builtin__", "posix"]
+    ["exceptions", "_file", "sys", "__builtin__", "posix", "_warnings"]
 )
 
 default_modules = essential_modules.copy()
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -610,6 +610,8 @@
     ops.JUMP_IF_FALSE_OR_POP : 0,
     ops.POP_JUMP_IF_TRUE : -1,
     ops.POP_JUMP_IF_FALSE : -1,
+
+    ops.BUILD_LIST_FROM_ARG: 1,
 }
 
 
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -965,7 +965,7 @@
         self.emit_op_arg(ops.CALL_METHOD, (kwarg_count << 8) | arg_count)
         return True
 
-    def _listcomp_generator(self, gens, gen_index, elt):
+    def _listcomp_generator(self, gens, gen_index, elt, single=False):
         start = self.new_block()
         skip = self.new_block()
         if_cleanup = self.new_block()
@@ -973,6 +973,8 @@
         gen = gens[gen_index]
         assert isinstance(gen, ast.comprehension)
         gen.iter.walkabout(self)
+        if single:
+            self.emit_op_arg(ops.BUILD_LIST_FROM_ARG, 0)
         self.emit_op(ops.GET_ITER)
         self.use_next_block(start)
         self.emit_jump(ops.FOR_ITER, anchor)
@@ -998,8 +1000,12 @@
 
     def visit_ListComp(self, lc):
         self.update_position(lc.lineno)
-        self.emit_op_arg(ops.BUILD_LIST, 0)
-        self._listcomp_generator(lc.generators, 0, lc.elt)
+        if len(lc.generators) != 1 or lc.generators[0].ifs:
+            single = False
+            self.emit_op_arg(ops.BUILD_LIST, 0)
+        else:
+            single = True
+        self._listcomp_generator(lc.generators, 0, lc.elt, single=single)
 
     def _comp_generator(self, node, generators, gen_index):
         start = self.new_block()
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -58,7 +58,8 @@
         w_res = pyco_expr.exec_host_bytecode(w_dict, w_dict)
         res = space.str_w(space.repr(w_res))
         if not isinstance(expected, float):
-            assert res == repr(expected)
+            noL = lambda expr: expr.replace('L', '')
+            assert noL(res) == noL(repr(expected))
         else:
             # Float representation can vary a bit between interpreter
             # versions, compare the numbers instead.
@@ -908,3 +909,17 @@
             return d['f'](5)
         """)
         assert 'generator' in space.str_w(space.repr(w_generator))
+        
+    def test_list_comprehension(self):
+        source = "def f(): [i for i in l]"
+        source2 = "def f(): [i for i in l for j in l]"
+        source3 = "def f(): [i for i in l if i]"
+        counts = self.count_instructions(source)
+        assert ops.BUILD_LIST not in counts
+        assert counts[ops.BUILD_LIST_FROM_ARG] == 1
+        counts = self.count_instructions(source2)
+        assert counts[ops.BUILD_LIST] == 1
+        assert ops.BUILD_LIST_FROM_ARG not in counts
+        counts = self.count_instructions(source3)
+        assert counts[ops.BUILD_LIST] == 1
+        assert ops.BUILD_LIST_FROM_ARG not in counts
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -7,7 +7,8 @@
 from pypy.interpreter.miscutils import ThreadLocals
 from pypy.tool.cache import Cache
 from pypy.tool.uid import HUGEVAL_BYTES
-from pypy.rlib.objectmodel import we_are_translated, newlist, compute_unique_id
+from pypy.rlib.objectmodel import we_are_translated, newlist_hint,\
+     compute_unique_id
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.rlib.timer import DummyTimer, Timer
 from pypy.rlib.rarithmetic import r_uint
@@ -328,7 +329,7 @@
                 raise
             modname = self.str_w(w_modname)
             mod = self.interpclass_w(w_mod)
-            if isinstance(mod, Module):
+            if isinstance(mod, Module) and not mod.startup_called:
                 self.timer.start("startup " + modname)
                 mod.init(self)
                 self.timer.stop("startup " + modname)
@@ -833,7 +834,7 @@
             items = []
         else:
             try:
-                items = newlist(lgt_estimate)
+                items = newlist_hint(lgt_estimate)
             except MemoryError:
                 items = [] # it might have lied
         #
@@ -1471,8 +1472,8 @@
 
     def warn(self, msg, w_warningcls):
         self.appexec([self.wrap(msg), w_warningcls], """(msg, warningcls):
-            import warnings
-            warnings.warn(msg, warningcls, stacklevel=2)
+            import _warnings
+            _warnings.warn(msg, warningcls, stacklevel=2)
         """)
 
     def resolve_target(self, w_obj):
diff --git a/pypy/interpreter/buffer.py b/pypy/interpreter/buffer.py
--- a/pypy/interpreter/buffer.py
+++ b/pypy/interpreter/buffer.py
@@ -20,6 +20,7 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.error import OperationError
 from pypy.rlib.objectmodel import compute_hash
+from pypy.rlib.rstring import StringBuilder
 
 
 class Buffer(Wrappable):
@@ -152,12 +153,13 @@
     if space.isinstance_w(w_object, space.w_unicode):
         # unicode objects support the old buffer interface
         # but not the new buffer interface (change in python  2.7)
-        from pypy.rlib.rstruct.unichar import pack_unichar
-        charlist = []
-        for unich in space.unicode_w(w_object):
-            pack_unichar(unich, charlist)
+        from pypy.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
+        unistr = space.unicode_w(w_object)
+        builder = StringBuilder(len(unistr) * UNICODE_SIZE)
+        for unich in unistr:
+            pack_unichar(unich, builder)
         from pypy.interpreter.buffer import StringBuffer
-        w_buffer = space.wrap(StringBuffer(''.join(charlist)))
+        w_buffer = space.wrap(StringBuffer(builder.build()))
     else:
         w_buffer = space.buffer(w_object)
 
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -15,9 +15,8 @@
 from pypy.rlib.rarithmetic import r_uint, intmask
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import check_nonneg
-from pypy.tool.stdlib_opcode import (bytecode_spec, host_bytecode_spec,
-                                     unrolling_all_opcode_descs, opmap,
-                                     host_opmap)
+from pypy.tool.stdlib_opcode import (bytecode_spec,
+                                     unrolling_all_opcode_descs)
 
 def unaryoperation(operationname):
     """NOT_RPYTHON"""
@@ -713,6 +712,19 @@
         w_list = self.space.newlist(items)
         self.pushvalue(w_list)
 
+    def BUILD_LIST_FROM_ARG(self, _, next_instr):
+        # this is a little dance, because list has to be before the
+        # value
+        last_val = self.popvalue()
+        try:
+            lgt = self.space.len_w(last_val)
+        except OperationError, e:
+            if e.async(self.space):
+                raise
+            lgt = 0 # oh well
+        self.pushvalue(self.space.newlist([], sizehint=lgt))
+        self.pushvalue(last_val)
+
     def LOAD_ATTR(self, nameindex, next_instr):
         "obj.attributename"
         w_obj = self.popvalue()
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,5 +1,6 @@
 from pypy.interpreter.error import OperationError
 from pypy.interpreter import unicodehelper
+from pypy.rlib.rstring import StringBuilder
 
 def parsestr(space, encoding, s, unicode_literals=False):
     # compiler.transformer.Transformer.decode_literal depends on what 
@@ -115,21 +116,23 @@
     the string is UTF-8 encoded and should be re-encoded in the
     specified encoding.
     """
-    lis = []
+    builder = StringBuilder(len(s))
     ps = 0
     end = len(s)
-    while ps < end:
-        if s[ps] != '\\':
-            # note that the C code has a label here.
-            # the logic is the same.
+    while 1:
+        ps2 = ps
+        while ps < end and s[ps] != '\\':
             if recode_encoding and ord(s[ps]) & 0x80:
                 w, ps = decode_utf8(space, s, ps, end, recode_encoding)
-                # Append bytes to output buffer.
-                lis.append(w)
+                builder.append(w)
+                ps2 = ps
             else:
-                lis.append(s[ps])
                 ps += 1
-            continue
+        if ps > ps2:
+            builder.append_slice(s, ps2, ps)
+        if ps == end:
+            break
+
         ps += 1
         if ps == end:
             raise_app_valueerror(space, 'Trailing \\ in string')
@@ -140,25 +143,25 @@
         if ch == '\n':
             pass
         elif ch == '\\':
-            lis.append('\\')
+            builder.append('\\')
         elif ch == "'":
-            lis.append("'")
+            builder.append("'")
         elif ch == '"':
-            lis.append('"')
+            builder.append('"')
         elif ch == 'b':
-            lis.append("\010")
+            builder.append("\010")
         elif ch == 'f':
-            lis.append('\014') # FF
+            builder.append('\014') # FF
         elif ch == 't':
-            lis.append('\t')
+            builder.append('\t')
         elif ch == 'n':
-            lis.append('\n')
+            builder.append('\n')
         elif ch == 'r':
-            lis.append('\r')
+            builder.append('\r')
         elif ch == 'v':
-            lis.append('\013') # VT
+            builder.append('\013') # VT
         elif ch == 'a':
-            lis.append('\007') # BEL, not classic C
+            builder.append('\007') # BEL, not classic C
         elif ch in '01234567':
             # Look for up to two more octal digits
             span = ps
@@ -168,13 +171,13 @@
             # emulate a strange wrap-around behavior of CPython:
             # \400 is the same as \000 because 0400 == 256
             num = int(octal, 8) & 0xFF
-            lis.append(chr(num))
+            builder.append(chr(num))
             ps = span
         elif ch == 'x':
             if ps+2 <= end and isxdigit(s[ps]) and isxdigit(s[ps + 1]):
                 hexa = s[ps : ps + 2]
                 num = int(hexa, 16)
-                lis.append(chr(num))
+                builder.append(chr(num))
                 ps += 2
             else:
                 raise_app_valueerror(space, 'invalid \\x escape')
@@ -184,13 +187,13 @@
             # this was not an escape, so the backslash
             # has to be added, and we start over in
             # non-escape mode.
-            lis.append('\\')
+            builder.append('\\')
             ps -= 1
             assert ps >= 0
             continue
             # an arbitry number of unescaped UTF-8 bytes may follow.
 
-    buf = ''.join(lis)
+    buf = builder.build()
     return buf
 
 
diff --git a/pypy/interpreter/streamutil.py b/pypy/interpreter/streamutil.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/streamutil.py
@@ -0,0 +1,17 @@
+from pypy.rlib.streamio import StreamError
+from pypy.interpreter.error import OperationError, wrap_oserror2
+
+def wrap_streamerror(space, e, w_filename=None):
+    if isinstance(e, StreamError):
+        return OperationError(space.w_ValueError,
+                              space.wrap(e.message))
+    elif isinstance(e, OSError):
+        return wrap_oserror_as_ioerror(space, e, w_filename)
+    else:
+        # should not happen: wrap_streamerror() is only called when
+        # StreamErrors = (OSError, StreamError) are raised
+        return OperationError(space.w_IOError, space.w_None)
+
+def wrap_oserror_as_ioerror(space, e, w_filename=None):
+    return wrap_oserror2(space, e, w_filename,
+                         w_exception_class=space.w_IOError)
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -794,7 +794,7 @@
     def test_tuple_constants(self):
         ns = {}
         exec "x = (1, 0); y = (1L, 0L)" in ns
-        assert isinstance(ns["x"][0], int)
+        assert isinstance(ns["x"][0], (int, long))
         assert isinstance(ns["y"][0], long)
 
     def test_division_folding(self):
diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -322,3 +322,14 @@
             space.ALL_BUILTIN_MODULES.pop()
             del space._builtinmodule_list
             mods = space.get_builtinmodule_to_install()
+
+    def test_dont_reload_builtin_mods_on_startup(self):
+        from pypy.tool.option import make_config, make_objspace
+        config = make_config(None)
+        space = make_objspace(config)
+        w_executable = space.wrap('executable')
+        assert space.str_w(space.getattr(space.sys, w_executable)) == 'py.py'
+        space.setattr(space.sys, w_executable, space.wrap('foobar'))
+        assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
+        space.startup()
+        assert space.str_w(space.getattr(space.sys, w_executable)) == 'foobar'
diff --git a/pypy/interpreter/test/test_typedef.py b/pypy/interpreter/test/test_typedef.py
--- a/pypy/interpreter/test/test_typedef.py
+++ b/pypy/interpreter/test/test_typedef.py
@@ -304,6 +304,42 @@
         assert_method(w_o1, "c", True)
         assert_method(w_o2, "c", False)
 
+    def test_total_ordering(self):
+        class W_SomeType(Wrappable):
+            def __init__(self, space, x):
+                self.space = space
+                self.x = x
+
+            def descr__lt(self, w_other):
+                assert isinstance(w_other, W_SomeType)
+                return self.space.wrap(self.x < w_other.x)
+
+            def descr__eq(self, w_other):
+                assert isinstance(w_other, W_SomeType)
+                return self.space.wrap(self.x == w_other.x)
+
+        W_SomeType.typedef = typedef.TypeDef(
+            'some_type',
+            __total_ordering__ = 'auto',
+            __lt__ = interp2app(W_SomeType.descr__lt),
+            __eq__ = interp2app(W_SomeType.descr__eq),
+            )
+        space = self.space
+        w_b = space.wrap(W_SomeType(space, 2))
+        w_c = space.wrap(W_SomeType(space, 2))
+        w_a = space.wrap(W_SomeType(space, 1))
+        # explicitly defined
+        assert space.is_true(space.lt(w_a, w_b))
+        assert not space.is_true(space.eq(w_a, w_b))
+        assert space.is_true(space.eq(w_b, w_c))
+        # automatically defined
+        assert space.is_true(space.le(w_a, w_b))
+        assert space.is_true(space.le(w_b, w_c))
+        assert space.is_true(space.gt(w_b, w_a))
+        assert space.is_true(space.ge(w_b, w_a))
+        assert space.is_true(space.ge(w_b, w_c))
+        assert space.is_true(space.ne(w_a, w_b))
+        assert not space.is_true(space.ne(w_b, w_c))
 
 class AppTestTypeDef:
 
diff --git a/pypy/interpreter/test/test_zpy.py b/pypy/interpreter/test/test_zpy.py
--- a/pypy/interpreter/test/test_zpy.py
+++ b/pypy/interpreter/test/test_zpy.py
@@ -17,14 +17,14 @@
 def test_executable():
     """Ensures sys.executable points to the py.py script"""
     # TODO : watch out for spaces/special chars in pypypath
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-c", "import sys;print sys.executable")
     assert output.splitlines()[-1] == pypypath
 
 def test_special_names():
     """Test the __name__ and __file__ special global names"""
     cmd = "print __name__; print '__file__' in globals()"
-    output = run(sys.executable, pypypath, '-c', cmd)
+    output = run(sys.executable, pypypath, '-S', '-c', cmd)
     assert output.splitlines()[-2] == '__main__'
     assert output.splitlines()[-1] == 'False'
 
@@ -33,24 +33,24 @@
     tmpfile.write("print __name__; print __file__\n")
     tmpfile.close()
 
-    output = run(sys.executable, pypypath, tmpfilepath)
+    output = run(sys.executable, pypypath, '-S', tmpfilepath)
     assert output.splitlines()[-2] == '__main__'
     assert output.splitlines()[-1] == str(tmpfilepath)
 
 def test_argv_command():
     """Some tests on argv"""
     # test 1 : no arguments
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-c", "import sys;print sys.argv")
     assert output.splitlines()[-1] == str(['-c'])
 
     # test 2 : some arguments after
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-c", "import sys;print sys.argv", "hello")
     assert output.splitlines()[-1] == str(['-c','hello'])
     
     # test 3 : additionnal pypy parameters
-    output = run(sys.executable, pypypath,
+    output = run(sys.executable, pypypath, '-S',
                  "-O", "-c", "import sys;print sys.argv", "hello")
     assert output.splitlines()[-1] == str(['-c','hello'])
 
@@ -65,15 +65,15 @@
     tmpfile.close()
 
     # test 1 : no arguments
-    output = run(sys.executable, pypypath, tmpfilepath)
+    output = run(sys.executable, pypypath, '-S', tmpfilepath)
     assert output.splitlines()[-1] == str([tmpfilepath])
     
     # test 2 : some arguments after
-    output = run(sys.executable, pypypath, tmpfilepath, "hello")
+    output = run(sys.executable, pypypath, '-S', tmpfilepath, "hello")
     assert output.splitlines()[-1] == str([tmpfilepath,'hello'])
     
     # test 3 : additionnal pypy parameters
-    output = run(sys.executable, pypypath, "-O", tmpfilepath, "hello")
+    output = run(sys.executable, pypypath, '-S', "-O", tmpfilepath, "hello")
     assert output.splitlines()[-1] == str([tmpfilepath,'hello'])
     
 
@@ -95,7 +95,7 @@
     tmpfile.write(TB_NORMALIZATION_CHK)
     tmpfile.close()
 
-    popen = subprocess.Popen([sys.executable, str(pypypath), tmpfilepath],
+    popen = subprocess.Popen([sys.executable, str(pypypath), '-S', tmpfilepath],
                              stderr=subprocess.PIPE)
     _, stderr = popen.communicate()
     assert stderr.endswith('KeyError: <normalized>\n')
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -12,7 +12,7 @@
 from pypy.rlib.jit import promote
 
 class TypeDef:
-    def __init__(self, __name, __base=None, **rawdict):
+    def __init__(self, __name, __base=None, __total_ordering__=None, **rawdict):
         "NOT_RPYTHON: initialization-time only"
         self.name = __name
         if __base is None:
@@ -34,6 +34,9 @@
         # xxx used by faking
         self.fakedcpytype = None
         self.add_entries(**rawdict)
+        assert __total_ordering__ in (None, 'auto'), "Unknown value for __total_ordering"
+        if __total_ordering__ == 'auto':
+            self.auto_total_ordering()
     
     def add_entries(self, **rawdict):
         # xxx fix the names of the methods to match what app-level expects
@@ -41,7 +44,15 @@
             if isinstance(value, (interp2app, GetSetProperty)):
                 value.name = key
         self.rawdict.update(rawdict)
-    
+
+    def auto_total_ordering(self):
+        assert '__lt__' in self.rawdict, "__total_ordering='auto' requires __lt__"
+        assert '__eq__' in self.rawdict, "__total_ordering='auto' requires __eq__"
+        self.add_entries(__le__ = auto__le__,
+                         __gt__ = auto__gt__,
+                         __ge__ = auto__ge__,
+                         __ne__ = auto__ne__)
+
     def _freeze_(self):
         # hint for the annotator: track individual constant instances of TypeDef
         return True
@@ -50,6 +61,26 @@
         return "<%s name=%r>" % (self.__class__.__name__, self.name)
 
 
+# generic special cmp methods defined on top of __lt__ and __eq__, used by
+# automatic total ordering
+
+ at interp2app
+def auto__le__(space, w_self, w_other):
+    return space.not_(space.lt(w_other, w_self))
+
+ at interp2app
+def auto__gt__(space, w_self, w_other):
+    return space.lt(w_other, w_self)
+
+ at interp2app
+def auto__ge__(space, w_self, w_other):
+    return space.not_(space.lt(w_self, w_other))
+
+ at interp2app
+def auto__ne__(space, w_self, w_other):
+    return space.not_(space.eq(w_self, w_other))
+
+
 # ____________________________________________________________
 #  Hash support
 
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -171,7 +171,7 @@
     'unicodesetitem'  : (('ref', 'int', 'int'), 'int'),
     'cast_ptr_to_int' : (('ref',), 'int'),
     'cast_int_to_ptr' : (('int',), 'ref'),
-    'debug_merge_point': (('ref', 'int'), None),
+    'debug_merge_point': (('ref', 'int', 'int'), None),
     'force_token'     : ((), 'int'),
     'call_may_force'  : (('int', 'varargs'), 'intorptr'),
     'guard_not_forced': ((), None),
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -208,6 +208,7 @@
     This is the class supporting --gcrootfinder=asmgcc.
     """
     is_shadow_stack = False
+    is_64_bit = (WORD == 8)
 
     LOC_REG       = 0
     LOC_ESP_PLUS  = 1
@@ -336,17 +337,17 @@
             self._gcmap_deadentries += 1
             item += asmgcroot.arrayitemsize
 
-    def get_basic_shape(self, is_64_bit=False):
+    def get_basic_shape(self):
         # XXX: Should this code even really know about stack frame layout of
         # the JIT?
-        if is_64_bit:
-            return [chr(self.LOC_EBP_PLUS  | 8),
-                    chr(self.LOC_EBP_MINUS | 8),
-                    chr(self.LOC_EBP_MINUS | 16),
-                    chr(self.LOC_EBP_MINUS | 24),
-                    chr(self.LOC_EBP_MINUS | 32),
-                    chr(self.LOC_EBP_MINUS | 40),
-                    chr(self.LOC_EBP_PLUS  | 0),
+        if self.is_64_bit:
+            return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   8(%rbp)
+                    chr(self.LOC_EBP_MINUS | 4),    # saved %rbx:  at  -8(%rbp)
+                    chr(self.LOC_EBP_MINUS | 8),    # saved %r12:  at -16(%rbp)
+                    chr(self.LOC_EBP_MINUS | 12),   # saved %r13:  at -24(%rbp)
+                    chr(self.LOC_EBP_MINUS | 16),   # saved %r14:  at -32(%rbp)
+                    chr(self.LOC_EBP_MINUS | 20),   # saved %r15:  at -40(%rbp)
+                    chr(self.LOC_EBP_PLUS  | 0),    # saved %rbp:  at    (%rbp)
                     chr(0)]
         else:
             return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   4(%ebp)
@@ -366,7 +367,11 @@
         shape.append(chr(number | flag))
 
     def add_frame_offset(self, shape, offset):
-        assert (offset & 3) == 0
+        if self.is_64_bit:
+            assert (offset & 7) == 0
+            offset >>= 1
+        else:
+            assert (offset & 3) == 0
         if offset >= 0:
             num = self.LOC_EBP_PLUS | offset
         else:
@@ -518,7 +523,7 @@
     def initialize(self):
         pass
 
-    def get_basic_shape(self, is_64_bit=False):
+    def get_basic_shape(self):
         return []
 
     def add_frame_offset(self, shape, offset):
@@ -594,7 +599,7 @@
         # if convenient for the backend, we compute the info about
         # the flag as (byte-offset, single-byte-flag).
         import struct
-        value = struct.pack("l", flag_word)
+        value = struct.pack(lltype.SignedFmt, flag_word)
         assert value.count('\x00') == len(value) - 1    # only one byte is != 0
         i = 0
         while value[i] == '\x00': i += 1
@@ -769,11 +774,19 @@
         self.generate_function('malloc_unicode', malloc_unicode,
                                [lltype.Signed])
 
-        # Rarely called: allocate a fixed-size amount of bytes, but
-        # not in the nursery, because it is too big.  Implemented like
-        # malloc_nursery_slowpath() above.
-        self.generate_function('malloc_fixedsize', malloc_nursery_slowpath,
-                               [lltype.Signed])
+        # Never called as far as I can tell, but there for completeness:
+        # allocate a fixed-size object, but not in the nursery, because
+        # it is too big.
+        def malloc_big_fixedsize(size, tid):
+            if self.DEBUG:
+                self._random_usage_of_xmm_registers()
+            type_id = llop.extract_ushort(llgroup.HALFWORD, tid)
+            check_typeid(type_id)
+            return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
+                                                   type_id, size,
+                                                   False, False, False)
+        self.generate_function('malloc_big_fixedsize', malloc_big_fixedsize,
+                               [lltype.Signed] * 2)
 
     def _bh_malloc(self, sizedescr):
         from pypy.rpython.memory.gctypelayout import check_typeid
diff --git a/pypy/jit/backend/llsupport/rewrite.py b/pypy/jit/backend/llsupport/rewrite.py
--- a/pypy/jit/backend/llsupport/rewrite.py
+++ b/pypy/jit/backend/llsupport/rewrite.py
@@ -96,8 +96,10 @@
     def handle_new_fixedsize(self, descr, op):
         assert isinstance(descr, SizeDescr)
         size = descr.size
-        self.gen_malloc_nursery(size, op.result)
-        self.gen_initialize_tid(op.result, descr.tid)
+        if self.gen_malloc_nursery(size, op.result):
+            self.gen_initialize_tid(op.result, descr.tid)
+        else:
+            self.gen_malloc_fixedsize(size, descr.tid, op.result)
 
     def handle_new_array(self, arraydescr, op):
         v_length = op.getarg(0)
@@ -112,8 +114,8 @@
                 pass    # total_size is still -1
         elif arraydescr.itemsize == 0:
             total_size = arraydescr.basesize
-        if 0 <= total_size <= 0xffffff:     # up to 16MB, arbitrarily
-            self.gen_malloc_nursery(total_size, op.result)
+        if (total_size >= 0 and
+                self.gen_malloc_nursery(total_size, op.result)):
             self.gen_initialize_tid(op.result, arraydescr.tid)
             self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
         elif self.gc_ll_descr.kind == 'boehm':
@@ -147,13 +149,22 @@
         # mark 'v_result' as freshly malloced
         self.recent_mallocs[v_result] = None
 
-    def gen_malloc_fixedsize(self, size, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, Const(size)).
-        Note that with the framework GC, this should be called very rarely.
+    def gen_malloc_fixedsize(self, size, typeid, v_result):
+        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, ...).
+        Used on Boehm, and on the framework GC for large fixed-size
+        mallocs.  (For all I know this latter case never occurs in
+        practice, but better safe than sorry.)
         """
-        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
-        self._gen_call_malloc_gc([ConstInt(addr), ConstInt(size)], v_result,
-                                 self.gc_ll_descr.malloc_fixedsize_descr)
+        if self.gc_ll_descr.fielddescr_tid is not None:  # framework GC
+            assert (size & (WORD-1)) == 0, "size not aligned?"
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_big_fixedsize')
+            args = [ConstInt(addr), ConstInt(size), ConstInt(typeid)]
+            descr = self.gc_ll_descr.malloc_big_fixedsize_descr
+        else:                                            # Boehm
+            addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_fixedsize')
+            args = [ConstInt(addr), ConstInt(size)]
+            descr = self.gc_ll_descr.malloc_fixedsize_descr
+        self._gen_call_malloc_gc(args, v_result, descr)
 
     def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result):
         """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) for Boehm."""
@@ -211,8 +222,7 @@
         """
         size = self.round_up_for_allocation(size)
         if not self.gc_ll_descr.can_use_nursery_malloc(size):
-            self.gen_malloc_fixedsize(size, v_result)
-            return
+            return False
         #
         op = None
         if self._op_malloc_nursery is not None:
@@ -238,6 +248,7 @@
         self._previous_size = size
         self._v_last_malloced_nursery = v_result
         self.recent_mallocs[v_result] = None
+        return True
 
     def gen_initialize_tid(self, v_newgcobj, tid):
         if self.gc_ll_descr.fielddescr_tid is not None:
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -148,7 +148,7 @@
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
-        return struct.calcsize('l' + code) - struct.calcsize(code)
+        return struct.calcsize(lltype.SignedFmt + code) - struct.calcsize(code)
     assert descr1.basesize == get_alignment('c')
     assert descr2.basesize == get_alignment('p')
     assert descr3.basesize == get_alignment('p')
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -2,6 +2,7 @@
 from pypy.jit.codewriter.longlong import is_64_bit
 from pypy.jit.backend.llsupport.descr import *
 from pypy.jit.backend.llsupport.ffisupport import *
+from pypy.rlib.rarithmetic import is_emulated_long
 
 
 class FakeCPU:
@@ -43,7 +44,7 @@
     assert descr.result_flag == FLAG_UNSIGNED
     assert descr.is_result_signed() == False
 
-    if not is_64_bit:
+    if not is_64_bit or is_emulated_long:
         descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong,
                                        None, 42)
         assert descr is None   # missing longlongs
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -57,6 +57,7 @@
         def frame_pos(n):
             return -4*(4+n)
         gcrootmap = GcRootMap_asmgcc()
+        gcrootmap.is_64_bit = False
         num1 = frame_pos(-5)
         num1a = num1|2
         num2 = frame_pos(55)
diff --git a/pypy/jit/backend/llsupport/test/test_rewrite.py b/pypy/jit/backend/llsupport/test/test_rewrite.py
--- a/pypy/jit/backend/llsupport/test/test_rewrite.py
+++ b/pypy/jit/backend/llsupport/test/test_rewrite.py
@@ -119,12 +119,19 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
-                                %(adescr.basesize + 10 * adescr.itemsize)d, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 10, descr=alendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(adescr.basesize)d,        \
+                                10,                         \
+                                %(adescr.itemsize)d,        \
+                                %(adescr.lendescr.offset)d, \
+                                descr=malloc_array_descr)
             jump()
         """)
+##      should ideally be:
+##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
+##                                %(adescr.basesize + 10 * adescr.itemsize)d, \
+##                                descr=malloc_fixedsize_descr)
+##            setfield_gc(p0, 10, descr=alendescr)
 
     def test_new_array_variable(self):
         self.check_rewrite("""
@@ -178,13 +185,20 @@
             jump()
         """, """
             [i1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
-                                %(unicodedescr.basesize +       \
-                                  10 * unicodedescr.itemsize)d, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 10, descr=unicodelendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+                                %(unicodedescr.basesize)d,  \
+                                10,                         \
+                                %(unicodedescr.itemsize)d,  \
+                                %(unicodelendescr.offset)d, \
+                                descr=malloc_array_descr)
             jump()
         """)
+##      should ideally be:
+##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
+##                                %(unicodedescr.basesize +       \
+##                                  10 * unicodedescr.itemsize)d, \
+##                                descr=malloc_fixedsize_descr)
+##            setfield_gc(p0, 10, descr=unicodelendescr)
 
 
 class TestFramework(RewriteTests):
@@ -203,7 +217,7 @@
         #
         class FakeCPU(object):
             def sizeof(self, STRUCT):
-                descr = SizeDescrWithVTable(102)
+                descr = SizeDescrWithVTable(104)
                 descr.tid = 9315
                 return descr
         self.cpu = FakeCPU()
@@ -368,11 +382,9 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
-                                %(bdescr.basesize + 104)d,    \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 8765, descr=tiddescr)
-            setfield_gc(p0, 103, descr=blendescr)
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
+                                %(bdescr.tid)d, 103,          \
+                                descr=malloc_array_descr)
             jump()
         """)
 
@@ -435,9 +447,8 @@
             jump()
         """, """
             [p1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), 104, \
-                                descr=malloc_fixedsize_descr)
-            setfield_gc(p0, 9315, descr=tiddescr)
+            p0 = call_malloc_gc(ConstClass(malloc_big_fixedsize), 104, 9315, \
+                                descr=malloc_big_fixedsize_descr)
             setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
             jump()
         """)
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -266,6 +266,38 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
 
+    def test_compile_big_bridge_out_of_small_loop(self):
+        i0 = BoxInt()
+        faildescr1 = BasicFailDescr(1)
+        looptoken = JitCellToken()
+        operations = [
+            ResOperation(rop.GUARD_FALSE, [i0], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [], None, descr=BasicFailDescr(2)),
+            ]
+        inputargs = [i0]
+        operations[0].setfailargs([i0])
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+
+        i1list = [BoxInt() for i in range(1000)]
+        bridge = []
+        iprev = i0
+        for i1 in i1list:
+            bridge.append(ResOperation(rop.INT_ADD, [iprev, ConstInt(1)], i1))
+            iprev = i1
+        bridge.append(ResOperation(rop.GUARD_FALSE, [i0], None,
+                                   descr=BasicFailDescr(3)))
+        bridge.append(ResOperation(rop.FINISH, [], None,
+                                   descr=BasicFailDescr(4)))
+        bridge[-2].setfailargs(i1list)
+
+        self.cpu.compile_bridge(faildescr1, [i0], bridge, looptoken)
+
+        fail = self.cpu.execute_token(looptoken, 1)
+        assert fail.identifier == 3
+        for i in range(1000):
+            res = self.cpu.get_latest_value_int(i)
+            assert res == 2 + i
+
     def test_get_latest_value_count(self):
         i0 = BoxInt()
         i1 = BoxInt()
@@ -572,7 +604,7 @@
                                          [funcbox, BoxInt(arg1), BoxInt(arg2)],
                                          'int', descr=calldescr)
             assert res.getint() == f(arg1, arg2)
-        
+
     def test_call_stack_alignment(self):
         # test stack alignment issues, notably for Mac OS/X.
         # also test the ordering of the arguments.
@@ -1458,7 +1490,8 @@
     def test_noops(self):
         c_box = self.alloc_string("hi there").constbox()
         c_nest = ConstInt(0)
-        self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest], 'void')
+        c_id = ConstInt(0)
+        self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest, c_id], 'void')
         self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
                                                c_nest, c_nest], 'void')
 
@@ -3029,7 +3062,7 @@
             ResOperation(rop.JUMP, [i2], None, descr=targettoken2),
             ]
         self.cpu.compile_bridge(faildescr, inputargs, operations, looptoken)
-        
+
         fail = self.cpu.execute_token(looptoken, 2)
         assert fail.identifier == 3
         res = self.cpu.get_latest_value_int(0)
@@ -3074,7 +3107,7 @@
             assert len(mc) == len(ops)
             for i in range(len(mc)):
                 assert mc[i].split("\t")[-1].startswith(ops[i])
-            
+
         data = ctypes.string_at(info.asmaddr, info.asmlen)
         mc = list(machine_code_dump(data, info.asmaddr, cpuname))
         lines = [line for line in mc if line.count('\t') == 2]
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -33,7 +33,7 @@
 from pypy.jit.backend.x86.support import values_array
 from pypy.jit.backend.x86 import support
 from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
-                             have_debug_prints, fatalerror_notb)
+                             have_debug_prints)
 from pypy.rlib import rgc
 from pypy.rlib.clibffi import FFI_DEFAULT_ABI
 from pypy.jit.backend.x86.jump import remap_frame_layout
@@ -88,7 +88,6 @@
         self._debug = False
         self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
         self.fail_boxes_count = 0
-        self._current_depths_cache = (0, 0)
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
         self.propagate_exception_path = 0
@@ -104,7 +103,6 @@
         self._debug = v
 
     def setup_once(self):
-        self._check_sse2()
         # the address of the function called by 'new'
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
@@ -162,28 +160,6 @@
                 debug_print(prefix + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
-    _CHECK_SSE2_FUNC_PTR = lltype.Ptr(lltype.FuncType([], lltype.Signed))
-
-    def _check_sse2(self):
-        if WORD == 8:
-            return     # all x86-64 CPUs support SSE2
-        if not self.cpu.supports_floats:
-            return     # the CPU doesn't support float, so we don't need SSE2
-        #
-        from pypy.jit.backend.x86.detect_sse2 import INSNS
-        mc = codebuf.MachineCodeBlockWrapper()
-        for c in INSNS:
-            mc.writechar(c)
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
-        fnptr = rffi.cast(self._CHECK_SSE2_FUNC_PTR, rawstart)
-        features = fnptr()
-        if bool(features & (1<<25)) and bool(features & (1<<26)):
-            return     # CPU supports SSE2
-        fatalerror_notb(
-          "This version of PyPy was compiled for a x86 CPU supporting SSE2.\n"
-          "Your CPU is too old.  Please translate a PyPy with the option:\n"
-          "--jit-backend=x86-without-sse2")
-
     def _build_float_constants(self):
         datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
         float_constants = datablockwrapper.malloc_aligned(32, alignment=16)
@@ -465,10 +441,8 @@
         looppos = self.mc.get_relative_pos()
         looptoken._x86_loop_code = looppos
         clt.frame_depth = -1     # temporarily
-        clt.param_depth = -1     # temporarily
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        frame_depth = self._assemble(regalloc, operations)
         clt.frame_depth = frame_depth
-        clt.param_depth = param_depth
         #
         size_excluding_failure_stuff = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
@@ -482,8 +456,7 @@
             rawstart + size_excluding_failure_stuff,
             rawstart))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
         self.patch_pending_failure_recoveries(rawstart)
         #
         ops_offset = self.mc.ops_offset
@@ -523,14 +496,13 @@
             assert ([loc.assembler() for loc in arglocs] ==
                     [loc.assembler() for loc in faildescr._x86_debug_faillocs])
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        fail_depths = faildescr._x86_current_depths
         startpos = self.mc.get_relative_pos()
-        operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
+        operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
                                              self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        frame_depth = self._assemble(regalloc, operations)
         codeendpos = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
         fullsize = self.mc.get_relative_pos()
@@ -540,19 +512,16 @@
         debug_print("bridge out of Guard %d has address %x to %x" %
                     (descr_number, rawstart, rawstart + codeendpos))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
         self.patch_pending_failure_recoveries(rawstart)
         if not we_are_translated():
             # for the benefit of tests
             faildescr._x86_bridge_frame_depth = frame_depth
-            faildescr._x86_bridge_param_depth = param_depth
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart)
         ops_offset = self.mc.ops_offset
         self.fixup_target_tokens(rawstart)
         self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
-        self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -723,15 +692,12 @@
         regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
-        frame_depth = regalloc.fm.get_frame_depth()
-        param_depth = regalloc.param_depth
+        frame_depth = regalloc.get_final_frame_depth()
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
             target_frame_depth = jump_target_descr._x86_clt.frame_depth
-            target_param_depth = jump_target_descr._x86_clt.param_depth
             frame_depth = max(frame_depth, target_frame_depth)
-            param_depth = max(param_depth, target_param_depth)
-        return frame_depth, param_depth
+        return frame_depth
 
     def _patchable_stackadjust(self):
         # stack adjustment LEA
@@ -915,10 +881,9 @@
         genop_math_list[oopspecindex](self, op, arglocs, resloc)
 
     def regalloc_perform_with_guard(self, op, guard_op, faillocs,
-                                    arglocs, resloc, current_depths):
+                                    arglocs, resloc):
         faildescr = guard_op.getdescr()
         assert isinstance(faildescr, AbstractFailDescr)
-        faildescr._x86_current_depths = current_depths
         failargs = guard_op.getfailargs()
         guard_opnum = guard_op.getopnum()
         guard_token = self.implement_guard_recovery(guard_opnum,
@@ -934,10 +899,9 @@
             # must be added by the genop_guard_list[]()
             assert guard_token is self.pending_guard_tokens[-1]
 
-    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
-                               current_depths):
+    def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
         self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
-                                         resloc, current_depths)
+                                         resloc)
 
     def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
         self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
@@ -1061,13 +1025,14 @@
                     self.mc.MOV(tmp, loc)
                     self.mc.MOV_sr(p, tmp.value)
             p += loc.get_width()
-        self._regalloc.reserve_param(p//WORD)
         # x is a location
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
         #
         if callconv != FFI_DEFAULT_ABI:
             self._fix_stdcall(callconv, p)
+        #
+        self._regalloc.needed_extra_stack_locations(p//WORD)
 
     def _fix_stdcall(self, callconv, p):
         from pypy.rlib.clibffi import FFI_STDCALL
@@ -1150,9 +1115,9 @@
             x = r10
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
 
-        self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
+        self._regalloc.needed_extra_stack_locations(len(pass_on_stack))
 
     def call(self, addr, args, res):
         force_index = self.write_new_force_index()
@@ -2159,7 +2124,6 @@
             if reg in save_registers:
                 self.mc.MOV_sr(p, reg.value)
                 p += WORD
-        self._regalloc.reserve_param(p//WORD)
         #
         if gcrootmap.is_shadow_stack:
             args = []
@@ -2215,6 +2179,7 @@
             if reg in save_registers:
                 self.mc.MOV_rs(reg.value, p)
                 p += WORD
+        self._regalloc.needed_extra_stack_locations(p//WORD)
 
     def call_reacquire_gil(self, gcrootmap, save_loc):
         # save the previous result (eax/xmm0) into the stack temporarily.
@@ -2222,7 +2187,6 @@
         # to save xmm0 in this case.
         if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
             self.mc.MOV_sr(WORD, save_loc.value)
-            self._regalloc.reserve_param(2)
         # call the reopenstack() function (also reacquiring the GIL)
         if gcrootmap.is_shadow_stack:
             args = []
@@ -2242,6 +2206,7 @@
         # restore the result from the stack
         if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
             self.mc.MOV_rs(save_loc.value, WORD)
+            self._regalloc.needed_extra_stack_locations(2)
 
     def genop_guard_call_assembler(self, op, guard_op, guard_token,
                                    arglocs, result_loc):
@@ -2518,11 +2483,6 @@
         # copy of heap(nursery_free_adr), so that the final MOV below is
         # a no-op.
 
-        # reserve room for the argument to the real malloc and the
-        # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
-        # word)
-        self._regalloc.reserve_param(1+16)
-
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
         if not shadow_stack:
@@ -2533,6 +2493,11 @@
         slowpath_addr2 = self.malloc_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 
+        # reserve room for the argument to the real malloc and the
+        # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
+        # word)
+        self._regalloc.needed_extra_stack_locations(1+16)
+
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -19,8 +19,8 @@
 
 
 class MachineCodeBlockWrapper(BlockBuilderMixin,
-                              codebuilder_cls,
-                              LocationCodeBuilder):
+                              LocationCodeBuilder,
+                              codebuilder_cls):
     def __init__(self):
         self.init_block_builder()
         # a list of relative positions; for each position p, the bytes
diff --git a/pypy/jit/backend/x86/detect_sse2.py b/pypy/jit/backend/x86/detect_sse2.py
--- a/pypy/jit/backend/x86/detect_sse2.py
+++ b/pypy/jit/backend/x86/detect_sse2.py
@@ -1,18 +1,17 @@
 import autopath
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rmmap import alloc, free
 
-INSNS = ("\xB8\x01\x00\x00\x00"     # MOV EAX, 1
-         "\x53"                     # PUSH EBX
-         "\x0F\xA2"                 # CPUID
-         "\x5B"                     # POP EBX
-         "\x92"                     # XCHG EAX, EDX
-         "\xC3")                    # RET
 
 def detect_sse2():
-    from pypy.rpython.lltypesystem import lltype, rffi
-    from pypy.rlib.rmmap import alloc, free
     data = alloc(4096)
     pos = 0
-    for c in INSNS:
+    for c in ("\xB8\x01\x00\x00\x00"     # MOV EAX, 1
+              "\x53"                     # PUSH EBX
+              "\x0F\xA2"                 # CPUID
+              "\x5B"                     # POP EBX
+              "\x92"                     # XCHG EAX, EDX
+              "\xC3"):                   # RET
         data[pos] = c
         pos += 1
     fnptr = rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), data)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -168,7 +168,7 @@
 
     def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
-        self.param_depth = 0
+        self.min_frame_depth = 0
         cpu = self.assembler.cpu
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
@@ -193,11 +193,9 @@
             self.min_bytes_before_label = 13
         return operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
-                       allgcrefs):
+    def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs):
         operations = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
-        self.param_depth = prev_depths[1]
         self.min_bytes_before_label = 0
         return operations
 
@@ -205,8 +203,15 @@
         self.min_bytes_before_label = max(self.min_bytes_before_label,
                                           at_least_position)
 
-    def reserve_param(self, n):
-        self.param_depth = max(self.param_depth, n)
+    def needed_extra_stack_locations(self, n):
+        # call *after* you needed extra stack locations: (%esp), (%esp+4)...
+        min_frame_depth = self.fm.get_frame_depth() + n
+        if min_frame_depth > self.min_frame_depth:
+            self.min_frame_depth = min_frame_depth
+
+    def get_final_frame_depth(self):
+        self.needed_extra_stack_locations(0)  # update min_frame_depth
+        return self.min_frame_depth
 
     def _set_initial_bindings(self, inputargs):
         if IS_X86_64:
@@ -376,25 +381,12 @@
     def locs_for_fail(self, guard_op):
         return [self.loc(v) for v in guard_op.getfailargs()]
 
-    def get_current_depth(self):
-        # return (self.fm.frame_depth, self.param_depth), but trying to share
-        # the resulting tuple among several calls
-        arg0 = self.fm.get_frame_depth()
-        arg1 = self.param_depth
-        result = self.assembler._current_depths_cache
-        if result[0] != arg0 or result[1] != arg1:
-            result = (arg0, arg1)
-            self.assembler._current_depths_cache = result
-        return result
-
     def perform_with_guard(self, op, guard_op, arglocs, result_loc):
         faillocs = self.locs_for_fail(guard_op)
         self.rm.position += 1
         self.xrm.position += 1
-        current_depths = self.get_current_depth()
         self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
-                                                   arglocs, result_loc,
-                                                   current_depths)
+                                                   arglocs, result_loc)
         if op.result is not None:
             self.possibly_free_var(op.result)
         self.possibly_free_vars(guard_op.getfailargs())
@@ -407,10 +399,8 @@
                                                       arglocs))
             else:
                 self.assembler.dump('%s(%s)' % (guard_op, arglocs))
-        current_depths = self.get_current_depth()
         self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
-                                              result_loc,
-                                              current_depths)
+                                              result_loc)
         self.possibly_free_vars(guard_op.getfailargs())
 
     def PerformDiscard(self, op, arglocs):
@@ -1393,7 +1383,7 @@
         self.force_spill_var(op.getarg(0))
 
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
-        shape = gcrootmap.get_basic_shape(IS_X86_64)
+        shape = gcrootmap.get_basic_shape()
         for v, val in self.fm.bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
diff --git a/pypy/jit/backend/x86/support.py b/pypy/jit/backend/x86/support.py
--- a/pypy/jit/backend/x86/support.py
+++ b/pypy/jit/backend/x86/support.py
@@ -1,6 +1,7 @@
 import sys
 from pypy.rpython.lltypesystem import lltype, rffi, llmemory
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.jit.backend.x86.arch import WORD
 
 
 def values_array(TP, size):
@@ -37,8 +38,13 @@
 
 if sys.platform == 'win32':
     ensure_sse2_floats = lambda : None
+    # XXX check for SSE2 on win32 too
 else:
+    if WORD == 4:
+        extra = ['-DPYPY_X86_CHECK_SSE2']
+    else:
+        extra = []
     ensure_sse2_floats = rffi.llexternal_use_eci(ExternalCompilationInfo(
         compile_extra = ['-msse2', '-mfpmath=sse',
-                         '-DPYPY_CPU_HAS_STANDARD_PRECISION'],
+                         '-DPYPY_CPU_HAS_STANDARD_PRECISION'] + extra,
         ))
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -28,7 +28,7 @@
 
 class MockGcRootMap(object):
     is_shadow_stack = False
-    def get_basic_shape(self, is_64_bit):
+    def get_basic_shape(self):
         return ['shape']
     def add_frame_offset(self, shape, offset):
         shape.append(offset)
@@ -184,6 +184,8 @@
         self.addrs[1] = self.addrs[0] + 64
         self.calls = []
         def malloc_slowpath(size):
+            if self.gcrootmap is not None:   # hook
+                self.gcrootmap.hook_malloc_slowpath()
             self.calls.append(size)
             # reset the nursery
             nadr = rffi.cast(lltype.Signed, self.nursery)
@@ -257,3 +259,218 @@
         assert gc_ll_descr.addrs[0] == nurs_adr + 24
         # this should call slow path once
         assert gc_ll_descr.calls == [24]
+
+    def test_save_regs_around_malloc(self):
+        S1 = lltype.GcStruct('S1')
+        S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
+                                   ('s1', lltype.Ptr(S1)),
+                                   ('s2', lltype.Ptr(S1)),
+                                   ('s3', lltype.Ptr(S1)),
+                                   ('s4', lltype.Ptr(S1)),
+                                   ('s5', lltype.Ptr(S1)),
+                                   ('s6', lltype.Ptr(S1)),
+                                   ('s7', lltype.Ptr(S1)),
+                                   ('s8', lltype.Ptr(S1)),
+                                   ('s9', lltype.Ptr(S1)),
+                                   ('s10', lltype.Ptr(S1)),
+                                   ('s11', lltype.Ptr(S1)),
+                                   ('s12', lltype.Ptr(S1)),
+                                   ('s13', lltype.Ptr(S1)),
+                                   ('s14', lltype.Ptr(S1)),
+                                   ('s15', lltype.Ptr(S1)))
+        cpu = self.cpu
+        self.namespace = self.namespace.copy()
+        for i in range(16):
+            self.namespace['ds%i' % i] = cpu.fielddescrof(S2, 's%d' % i)
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=ds0)
+        p2 = getfield_gc(p0, descr=ds1)
+        p3 = getfield_gc(p0, descr=ds2)
+        p4 = getfield_gc(p0, descr=ds3)
+        p5 = getfield_gc(p0, descr=ds4)
+        p6 = getfield_gc(p0, descr=ds5)
+        p7 = getfield_gc(p0, descr=ds6)
+        p8 = getfield_gc(p0, descr=ds7)
+        p9 = getfield_gc(p0, descr=ds8)
+        p10 = getfield_gc(p0, descr=ds9)
+        p11 = getfield_gc(p0, descr=ds10)
+        p12 = getfield_gc(p0, descr=ds11)
+        p13 = getfield_gc(p0, descr=ds12)
+        p14 = getfield_gc(p0, descr=ds13)
+        p15 = getfield_gc(p0, descr=ds14)
+        p16 = getfield_gc(p0, descr=ds15)
+        #
+        # now all registers are in use
+        p17 = call_malloc_nursery(40)
+        p18 = call_malloc_nursery(40)     # overflow
+        #
+        finish(p1, p2, p3, p4, p5, p6, p7, p8,         \
+               p9, p10, p11, p12, p13, p14, p15, p16)
+        '''
+        s2 = lltype.malloc(S2)
+        for i in range(16):
+            setattr(s2, 's%d' % i, lltype.malloc(S1))
+        s2ref = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
+        #
+        self.interpret(ops, [s2ref])
+        gc_ll_descr = cpu.gc_ll_descr
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.calls == [40]
+        # check the returned pointers
+        for i in range(16):
+            s1ref = self.cpu.get_latest_value_ref(i)
+            s1 = lltype.cast_opaque_ptr(lltype.Ptr(S1), s1ref)
+            assert s1 == getattr(s2, 's%d' % i)
+
+
+class MockShadowStackRootMap(MockGcRootMap):
+    is_shadow_stack = True
+    MARKER_FRAME = 88       # this marker follows the frame addr
+    S1 = lltype.GcStruct('S1')
+
+    def __init__(self):
+        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 20,
+                                   flavor='raw')
+        # root_stack_top
+        self.addrs[0] = rffi.cast(lltype.Signed, self.addrs) + 3*WORD
+        # random stuff
+        self.addrs[1] = 123456
+        self.addrs[2] = 654321
+        self.check_initial_and_final_state()
+        self.callshapes = {}
+        self.should_see = []
+
+    def check_initial_and_final_state(self):
+        assert self.addrs[0] == rffi.cast(lltype.Signed, self.addrs) + 3*WORD
+        assert self.addrs[1] == 123456
+        assert self.addrs[2] == 654321
+
+    def get_root_stack_top_addr(self):
+        return rffi.cast(lltype.Signed, self.addrs)
+
+    def compress_callshape(self, shape, datablockwrapper):
+        assert shape[0] == 'shape'
+        return ['compressed'] + shape[1:]
+
+    def write_callshape(self, mark, force_index):
+        assert mark[0] == 'compressed'
+        assert force_index not in self.callshapes
+        assert force_index == 42 + len(self.callshapes)
+        self.callshapes[force_index] = mark
+
+    def hook_malloc_slowpath(self):
+        num_entries = self.addrs[0] - rffi.cast(lltype.Signed, self.addrs)
+        assert num_entries == 5*WORD    # 3 initially, plus 2 by the asm frame
+        assert self.addrs[1] == 123456  # unchanged
+        assert self.addrs[2] == 654321  # unchanged
+        frame_addr = self.addrs[3]                   # pushed by the asm frame
+        assert self.addrs[4] == self.MARKER_FRAME    # pushed by the asm frame
+        #
+        from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS
+        addr = rffi.cast(rffi.CArrayPtr(lltype.Signed),
+                         frame_addr + FORCE_INDEX_OFS)
+        force_index = addr[0]
+        assert force_index == 43    # in this test: the 2nd call_malloc_nursery
+        #
+        # The callshapes[43] saved above should list addresses both in the
+        # COPY_AREA and in the "normal" stack, where all the 16 values p1-p16
+        # of test_save_regs_at_correct_place should have been stored.  Here
+        # we replace them with new addresses, to emulate a moving GC.
+        shape = self.callshapes[force_index]
+        assert len(shape[1:]) == len(self.should_see)
+        new_objects = [None] * len(self.should_see)
+        for ofs in shape[1:]:
+            assert isinstance(ofs, int)    # not a register at all here
+            addr = rffi.cast(rffi.CArrayPtr(lltype.Signed), frame_addr + ofs)
+            contains = addr[0]
+            for j in range(len(self.should_see)):
+                obj = self.should_see[j]
+                if contains == rffi.cast(lltype.Signed, obj):
+                    assert new_objects[j] is None   # duplicate?
+                    break
+            else:
+                assert 0   # the value read from the stack looks random?
+            new_objects[j] = lltype.malloc(self.S1)
+            addr[0] = rffi.cast(lltype.Signed, new_objects[j])
+        self.should_see[:] = new_objects
+
+
+class TestMallocShadowStack(BaseTestRegalloc):
+
+    def setup_method(self, method):
+        cpu = CPU(None, None)
+        cpu.gc_ll_descr = GCDescrFastpathMalloc()
+        cpu.gc_ll_descr.gcrootmap = MockShadowStackRootMap()
+        cpu.setup_once()
+        for i in range(42):
+            cpu.reserve_some_free_fail_descr_number()
+        self.cpu = cpu
+
+    def test_save_regs_at_correct_place(self):
+        cpu = self.cpu
+        gc_ll_descr = cpu.gc_ll_descr
+        S1 = gc_ll_descr.gcrootmap.S1
+        S2 = lltype.GcStruct('S2', ('s0', lltype.Ptr(S1)),
+                                   ('s1', lltype.Ptr(S1)),
+                                   ('s2', lltype.Ptr(S1)),
+                                   ('s3', lltype.Ptr(S1)),
+                                   ('s4', lltype.Ptr(S1)),
+                                   ('s5', lltype.Ptr(S1)),
+                                   ('s6', lltype.Ptr(S1)),
+                                   ('s7', lltype.Ptr(S1)),
+                                   ('s8', lltype.Ptr(S1)),
+                                   ('s9', lltype.Ptr(S1)),
+                                   ('s10', lltype.Ptr(S1)),
+                                   ('s11', lltype.Ptr(S1)),
+                                   ('s12', lltype.Ptr(S1)),
+                                   ('s13', lltype.Ptr(S1)),
+                                   ('s14', lltype.Ptr(S1)),
+                                   ('s15', lltype.Ptr(S1)))
+        self.namespace = self.namespace.copy()
+        for i in range(16):
+            self.namespace['ds%i' % i] = cpu.fielddescrof(S2, 's%d' % i)
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=ds0)
+        p2 = getfield_gc(p0, descr=ds1)
+        p3 = getfield_gc(p0, descr=ds2)
+        p4 = getfield_gc(p0, descr=ds3)
+        p5 = getfield_gc(p0, descr=ds4)
+        p6 = getfield_gc(p0, descr=ds5)
+        p7 = getfield_gc(p0, descr=ds6)
+        p8 = getfield_gc(p0, descr=ds7)
+        p9 = getfield_gc(p0, descr=ds8)
+        p10 = getfield_gc(p0, descr=ds9)
+        p11 = getfield_gc(p0, descr=ds10)
+        p12 = getfield_gc(p0, descr=ds11)
+        p13 = getfield_gc(p0, descr=ds12)
+        p14 = getfield_gc(p0, descr=ds13)
+        p15 = getfield_gc(p0, descr=ds14)
+        p16 = getfield_gc(p0, descr=ds15)
+        #
+        # now all registers are in use
+        p17 = call_malloc_nursery(40)
+        p18 = call_malloc_nursery(40)     # overflow
+        #
+        finish(p1, p2, p3, p4, p5, p6, p7, p8,         \
+               p9, p10, p11, p12, p13, p14, p15, p16)
+        '''
+        s2 = lltype.malloc(S2)
+        for i in range(16):
+            s1 = lltype.malloc(S1)
+            setattr(s2, 's%d' % i, s1)
+            gc_ll_descr.gcrootmap.should_see.append(s1)
+        s2ref = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
+        #
+        self.interpret(ops, [s2ref])
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.calls == [40]
+        gc_ll_descr.gcrootmap.check_initial_and_final_state()
+        # check the returned pointers
+        for i in range(16):
+            s1ref = self.cpu.get_latest_value_ref(i)
+            s1 = lltype.cast_opaque_ptr(lltype.Ptr(S1), s1ref)
+            for j in range(16):
+                assert s1 != getattr(s2, 's%d' % j)
+            assert s1 == gc_ll_descr.gcrootmap.should_see[i]
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -34,7 +34,6 @@
         '''
         loop = self.interpret(ops, [0])
         previous = loop._jitcelltoken.compiled_loop_token.frame_depth
-        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         assert self.getint(0) == 20
         ops = '''
         [i1]
@@ -51,7 +50,6 @@
         bridge = self.attach_bridge(ops, loop, -2)
         descr = loop.operations[3].getdescr()
         new = descr._x86_bridge_frame_depth
-        assert descr._x86_bridge_param_depth == 0
         # the force_spill() forces the stack to grow
         assert new > previous
         fail = self.run(loop, 0)
@@ -116,10 +114,8 @@
         loop_frame_depth = loop._jitcelltoken.compiled_loop_token.frame_depth
         bridge = self.attach_bridge(ops, loop, 6)
         guard_op = loop.operations[6]
-        assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
         # the force_spill() forces the stack to grow
         assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
-        assert guard_op.getdescr()._x86_bridge_param_depth == 0
         self.run(loop, 0, 0, 0, 0, 0, 0)
         assert self.getint(0) == 1
         assert self.getint(1) == 20
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -606,23 +606,37 @@
         assert self.getints(9) == [0, 1, 1, 1, 1, 1, 1, 1, 1]
 
 class TestRegAllocCallAndStackDepth(BaseTestRegalloc):
-    def expected_param_depth(self, num_args):
+    def expected_frame_depth(self, num_call_args, num_pushed_input_args=0):
         # Assumes the arguments are all non-float
         if IS_X86_32:
-            return num_args
+            extra_esp = num_call_args
+            return extra_esp
         elif IS_X86_64:
-            return max(num_args - 6, 0)
+            # 'num_pushed_input_args' is for X86_64 only
+            extra_esp = max(num_call_args - 6, 0)
+            return num_pushed_input_args + extra_esp
 
     def test_one_call(self):
         ops = '''
-        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b]
         i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
-        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b)
         '''
-        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
-        assert self.getints(10) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+        loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 8])
+        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 8]
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(1)
+        assert clt.frame_depth == self.expected_frame_depth(1, 5)
+
+    def test_one_call_reverse(self):
+        ops = '''
+        [i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b, i0]
+        i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, i9b)
+        '''
+        loop = self.interpret(ops, [7, 9, 9 ,9, 9, 9, 9, 9, 9, 8, 4])
+        assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 8]
+        clt = loop._jitcelltoken.compiled_loop_token
+        assert clt.frame_depth == self.expected_frame_depth(1, 6)
 
     def test_two_calls(self):
         ops = '''
@@ -634,7 +648,7 @@
         loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9])
         assert self.getints(10) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9]
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == self.expected_frame_depth(2, 5)
 
     def test_call_many_arguments(self):
         # NB: The first and last arguments in the call are constants. This
@@ -648,25 +662,31 @@
         loop = self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
         assert self.getint(0) == 55
         clt = loop._jitcelltoken.compiled_loop_token
-        assert clt.param_depth == self.expected_param_depth(10)
+        assert clt.frame_depth == self.expected_frame_depth(10)
 
     def test_bridge_calls_1(self):
         ops = '''
         [i0, i1]
         i2 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
-        guard_value(i2, 0, descr=fdescr1) [i2, i1]
+        guard_value(i2, 0, descr=fdescr1) [i2, i0, i1]
         finish(i1)
         '''
         loop = self.interpret(ops, [4, 7])
         assert self.getint(0) == 5
+        clt = loop._jitcelltoken.compiled_loop_token
+        orgdepth = clt.frame_depth
+        assert orgdepth == self.expected_frame_depth(1, 2)
+
         ops = '''
-        [i2, i1]
+        [i2, i0, i1]
         i3 = call(ConstClass(f2ptr), i2, i1, descr=f2_calldescr)        
-        finish(i3, descr=fdescr2)        
+        finish(i3, i0, descr=fdescr2)
         '''
         bridge = self.attach_bridge(ops, loop, -2)
 
-        assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == max(orgdepth, self.expected_frame_depth(2, 2))
+        assert loop.operations[-2].getdescr()._x86_bridge_frame_depth == \
+            self.expected_frame_depth(2, 2)
 
         self.run(loop, 4, 7)
         assert self.getint(0) == 5*7
@@ -676,10 +696,14 @@
         [i0, i1]
         i2 = call(ConstClass(f2ptr), i0, i1, descr=f2_calldescr)
         guard_value(i2, 0, descr=fdescr1) [i2]
-        finish(i1)
+        finish(i2)
         '''
         loop = self.interpret(ops, [4, 7])
         assert self.getint(0) == 4*7
+        clt = loop._jitcelltoken.compiled_loop_token
+        orgdepth = clt.frame_depth
+        assert orgdepth == self.expected_frame_depth(2)
+
         ops = '''
         [i2]
         i3 = call(ConstClass(f1ptr), i2, descr=f1_calldescr)        
@@ -687,7 +711,9 @@
         '''
         bridge = self.attach_bridge(ops, loop, -2)
 
-        assert loop.operations[-2].getdescr()._x86_bridge_param_depth == self.expected_param_depth(2)
+        assert clt.frame_depth == max(orgdepth, self.expected_frame_depth(1))
+        assert loop.operations[-2].getdescr()._x86_bridge_frame_depth == \
+            self.expected_frame_depth(1)
 
         self.run(loop, 4, 7)
         assert self.getint(0) == 29
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -371,7 +371,7 @@
 
         operations = [
             ResOperation(rop.LABEL, [i0], None, descr=targettoken),
-            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0], None),
+            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("hello"), 0, 0], None),
             ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
             ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
             ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
@@ -390,7 +390,7 @@
         bridge = [
             ResOperation(rop.INT_LE, [i1b, ConstInt(19)], i3),
             ResOperation(rop.GUARD_TRUE, [i3], None, descr=faildescr2),
-            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0], None),
+            ResOperation(rop.DEBUG_MERGE_POINT, [FakeString("bye"), 0, 0], None),
             ResOperation(rop.JUMP, [i1b], None, descr=targettoken),
         ]
         bridge[1].setfailargs([i1b])
@@ -531,12 +531,12 @@
         loop = """
         [i0]
         label(i0, descr=preambletoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
         label(i1, descr=targettoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i11 = int_add(i1, 1)
         i12 = int_ge(i11, 10)
         guard_false(i12) []
@@ -569,7 +569,7 @@
         loop = """
         [i0]
         label(i0, descr=targettoken)
-        debug_merge_point('xyz', 0)
+        debug_merge_point('xyz', 0, 0)
         i1 = int_add(i0, 1)
         i2 = int_ge(i1, 10)
         guard_false(i2) []
diff --git a/pypy/jit/backend/x86/test/test_ztranslation.py b/pypy/jit/backend/x86/test/test_ztranslation.py
--- a/pypy/jit/backend/x86/test/test_ztranslation.py
+++ b/pypy/jit/backend/x86/test/test_ztranslation.py
@@ -52,6 +52,7 @@
             set_param(jitdriver, "trace_eagerness", 2)
             total = 0
             frame = Frame(i)
+            j = float(j)
             while frame.i > 3:
                 jitdriver.can_enter_jit(frame=frame, total=total, j=j)
                 jitdriver.jit_merge_point(frame=frame, total=total, j=j)
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -365,7 +365,7 @@
     def handle_builtin_call(self, op):
         oopspec_name, args = support.decode_builtin_call(op)
         # dispatch to various implementations depending on the oopspec_name
-        if oopspec_name.startswith('list.') or oopspec_name == 'newlist':
+        if oopspec_name.startswith('list.') or oopspec_name.startswith('newlist'):
             prepare = self._handle_list_call
         elif oopspec_name.startswith('stroruni.'):
             prepare = self._handle_stroruni_call
@@ -1494,6 +1494,14 @@
                                arraydescr, v_length],
                               op.result)
 
+    def do_resizable_newlist_hint(self, op, args, arraydescr, lengthdescr,
+                                  itemsdescr, structdescr):
+        v_hint = self._get_initial_newlist_length(op, args)
+        return SpaceOperation('newlist_hint',
+                              [structdescr, lengthdescr, itemsdescr,
+                               arraydescr, v_hint],
+                              op.result)
+
     def do_resizable_list_getitem(self, op, args, arraydescr, lengthdescr,
                                   itemsdescr, structdescr):
         v_index, extraop = self._prepare_list_getset(op, lengthdescr, args,
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -144,6 +144,10 @@
 _ll_1_newlist.need_result_type = True
 _ll_2_newlist.need_result_type = True
 
+def _ll_1_newlist_hint(LIST, hint):
+    return LIST.ll_newlist_hint(hint)
+_ll_1_newlist_hint.need_result_type = True
+
 def _ll_1_list_len(l):
     return l.ll_length()
 def _ll_2_list_getitem(l, index):
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -982,6 +982,15 @@
         cpu.bh_setfield_gc_r(result, itemsdescr, items)
         return result
 
+    @arguments("cpu", "d", "d", "d", "d", "i", returns="r")
+    def bhimpl_newlist_hint(cpu, structdescr, lengthdescr, itemsdescr,
+                            arraydescr, lengthhint):
+        result = cpu.bh_new(structdescr)
+        cpu.bh_setfield_gc_i(result, lengthdescr, 0)
+        items = cpu.bh_new_array(arraydescr, lengthhint)
+        cpu.bh_setfield_gc_r(result, itemsdescr, items)
+        return result
+
     @arguments("cpu", "r", "d", "d", "i", returns="i")
     def bhimpl_getlistitem_gc_i(cpu, lst, itemsdescr, arraydescr, index):
         items = cpu.bh_getfield_gc_r(lst, itemsdescr)
@@ -1379,7 +1388,8 @@
         elif opnum == rop.GUARD_NO_OVERFLOW:
             # Produced by int_xxx_ovf().  The pc is just after the opcode.
             # We get here because it did not used to overflow, but now it does.
-            return get_llexception(self.cpu, OverflowError())
+            if not dont_change_position:
+                return get_llexception(self.cpu, OverflowError())
         #
         elif opnum == rop.GUARD_OVERFLOW:
             # Produced by int_xxx_ovf().  The pc is just after the opcode.
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -289,8 +289,21 @@
             assert isinstance(token, TargetToken)
             assert token.original_jitcell_token is None
             token.original_jitcell_token = trace.original_jitcell_token
-            
-    
+
+
+def do_compile_loop(metainterp_sd, inputargs, operations, looptoken,
+                    log=True, name=''):
+    metainterp_sd.logger_ops.log_loop(inputargs, operations, -2,
+                                      'compiling', name=name)
+    return metainterp_sd.cpu.compile_loop(inputargs, operations, looptoken,
+                                          log=log, name=name)
+
+def do_compile_bridge(metainterp_sd, faildescr, inputargs, operations,
+                      original_loop_token, log=True):
+    metainterp_sd.logger_ops.log_bridge(inputargs, operations, -2)
+    return metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
+                                            original_loop_token, log=log)
+
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     vinfo = jitdriver_sd.virtualizable_info
     if vinfo is not None:
@@ -319,9 +332,9 @@
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        asminfo = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                  original_jitcell_token,
-                                                  name=loopname)
+        asminfo = do_compile_loop(metainterp_sd, loop.inputargs,
+                                  operations, original_jitcell_token,
+                                  name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
@@ -333,7 +346,6 @@
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
-    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     if asminfo is not None:
         ops_offset = asminfo.ops_offset
     else:
@@ -365,9 +377,9 @@
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        asminfo = metainterp_sd.cpu.compile_bridge(faildescr, inputargs,
-                                                   operations,
-                                                   original_loop_token)
+        asminfo = do_compile_bridge(metainterp_sd, faildescr, inputargs,
+                                    operations,
+                                    original_loop_token)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
diff --git a/pypy/jit/metainterp/graphpage.py b/pypy/jit/metainterp/graphpage.py
--- a/pypy/jit/metainterp/graphpage.py
+++ b/pypy/jit/metainterp/graphpage.py
@@ -169,9 +169,9 @@
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
                 if jd_sd._get_printable_location_ptr:
-                    s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+                    s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
                     s = s.replace(',', '.') # we use comma for argument splitting
-                    op_repr = "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+                    op_repr = "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
             lines.append(op_repr)
             if is_interesting_guard(op):
                 tgt = op.getdescr()._debug_suboperations[0]
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -4,7 +4,7 @@
 from pypy.rpython.ootypesystem import ootype
 from pypy.rlib.objectmodel import we_are_translated, Symbolic
 from pypy.rlib.objectmodel import compute_unique_id
-from pypy.rlib.rarithmetic import r_int64
+from pypy.rlib.rarithmetic import r_int64, is_valid_int
 from pypy.conftest import option
 
 from pypy.jit.metainterp.resoperation import ResOperation, rop
@@ -213,7 +213,7 @@
 
     def __init__(self, value):
         if not we_are_translated():
-            if isinstance(value, int):
+            if is_valid_int(value):
                 value = int(value)    # bool -> int
             else:
                 assert isinstance(value, Symbolic)
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -18,6 +18,10 @@
             debug_start("jit-log-noopt-loop")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
+        elif number == -2:
+            debug_start("jit-log-compiling-loop")
+            logops = self._log_operations(inputargs, operations, ops_offset)
+            debug_stop("jit-log-compiling-loop")
         else:
             debug_start("jit-log-opt-loop")
             debug_print("# Loop", number, '(%s)' % name , ":", type,
@@ -31,6 +35,10 @@
             debug_start("jit-log-noopt-bridge")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-bridge")
+        elif number == -2:
+            debug_start("jit-log-compiling-bridge")
+            logops = self._log_operations(inputargs, operations, ops_offset)
+            debug_stop("jit-log-compiling-bridge")
         else:
             debug_start("jit-log-opt-bridge")
             debug_print("# bridge out of Guard", number,
@@ -102,9 +110,9 @@
     def repr_of_resop(self, op, ops_offset=None):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
             jd_sd = self.metainterp_sd.jitdrivers_sd[op.getarg(0).getint()]
-            s = jd_sd.warmstate.get_location_str(op.getarglist()[2:])
+            s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
             s = s.replace(',', '.') # we use comma for argument splitting
-            return "debug_merge_point(%d, '%s')" % (op.getarg(1).getint(), s)
+            return "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
         if ops_offset is None:
             offset = -1
         else:
@@ -141,7 +149,7 @@
             if target_token.exported_state:
                 for op in target_token.exported_state.inputarg_setup_ops:
                     debug_print('    ' + self.repr_of_resop(op))
-        
+
     def _log_operations(self, inputargs, operations, ops_offset):
         if not have_debug_prints():
             return
diff --git a/pypy/jit/metainterp/optimizeopt/__init__.py b/pypy/jit/metainterp/optimizeopt/__init__.py
--- a/pypy/jit/metainterp/optimizeopt/__init__.py
+++ b/pypy/jit/metainterp/optimizeopt/__init__.py
@@ -9,7 +9,7 @@
 from pypy.jit.metainterp.optimizeopt.simplify import OptSimplify
 from pypy.jit.metainterp.optimizeopt.pure import OptPure
 from pypy.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
-from pypy.rlib.jit import PARAMETERS
+from pypy.rlib.jit import PARAMETERS, ENABLE_ALL_OPTS
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 
@@ -30,6 +30,9 @@
 ALL_OPTS_LIST = [name for name, _ in ALL_OPTS]
 ALL_OPTS_NAMES = ':'.join([name for name, _ in ALL_OPTS])
 
+assert ENABLE_ALL_OPTS == ALL_OPTS_NAMES, (
+    'please fix rlib/jit.py to say ENABLE_ALL_OPTS = %r' % (ALL_OPTS_NAMES,))
+
 def build_opt_chain(metainterp_sd, enable_opts):
     config = metainterp_sd.config
     optimizations = []
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -398,6 +398,40 @@
         with raises(InvalidLoop):
             self.optimize_loop(ops, ops)
 
+    def test_issue1045(self):
+        ops = """
+        [i55]
+        i73 = int_mod(i55, 2)
+        i75 = int_rshift(i73, 63)
+        i76 = int_and(2, i75)
+        i77 = int_add(i73, i76)
+        i81 = int_eq(i77, 1)
+        i0 = int_ge(i55, 1)
+        guard_true(i0) []
+        label(i55)
+        i3 = int_mod(i55, 2)
+        i5 = int_rshift(i3, 63)
+        i6 = int_and(2, i5)
+        i7 = int_add(i3, i6)
+        i8 = int_eq(i7, 1)
+        escape(i8)
+        jump(i55)
+        """
+        expected = """
+        [i55]
+        i73 = int_mod(i55, 2)
+        i75 = int_rshift(i73, 63)
+        i76 = int_and(2, i75)
+        i77 = int_add(i73, i76)
+        i81 = int_eq(i77, 1)
+        i0 = int_ge(i55, 1)
+        guard_true(i0) []
+        label(i55, i81)
+        escape(i81)
+        jump(i55, i81)
+        """
+        self.optimize_loop(ops, expected)
+        
 class OptRenameStrlen(Optimization):
     def propagate_forward(self, op):
         dispatch_opt(self, op)
@@ -423,7 +457,7 @@
         metainterp_sd = FakeMetaInterpStaticData(self.cpu)
         optimize_unroll(metainterp_sd, loop, [OptRenameStrlen(), OptPure()], True)
 
-    def test_optimizer_renaming_boxes(self):
+    def test_optimizer_renaming_boxes1(self):
         ops = """
         [p1]
         i1 = strlen(p1)
@@ -457,7 +491,6 @@
         jump(p1, i11)
         """
         self.optimize_loop(ops, expected)
-
         
 
 class TestLLtype(OptimizeoptTestMultiLabel, LLtypeMixin):
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -5031,6 +5031,42 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_str_copy_virtual(self):
+        ops = """
+        [i0]
+        p0 = newstr(8)
+        strsetitem(p0, 0, i0)
+        strsetitem(p0, 1, i0)
+        strsetitem(p0, 2, i0)
+        strsetitem(p0, 3, i0)
+        strsetitem(p0, 4, i0)
+        strsetitem(p0, 5, i0)
+        strsetitem(p0, 6, i0)
+        strsetitem(p0, 7, i0)
+        p1 = newstr(12)
+        copystrcontent(p0, p1, 0, 0, 8)
+        strsetitem(p1, 8, 3)
+        strsetitem(p1, 9, 0)
+        strsetitem(p1, 10, 0)
+        strsetitem(p1, 11, 0)
+        finish(p1)
+        """
+        expected = """
+        [i0]
+        p1 = newstr(12)
+        strsetitem(p1, 0, i0)
+        strsetitem(p1, 1, i0)
+        strsetitem(p1, 2, i0)
+        strsetitem(p1, 3, i0)
+        strsetitem(p1, 4, i0)
+        strsetitem(p1, 5, i0)
+        strsetitem(p1, 6, i0)
+        strsetitem(p1, 7, i0)
+        strsetitem(p1, 8, 3)
+        finish(p1)
+        """
+        self.optimize_strunicode_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -9,7 +9,6 @@
 from pypy.jit.metainterp.inliner import Inliner
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.resume import Snapshot
-from pypy.rlib.debug import debug_print
 import sys, os
 
 # FIXME: Introduce some VirtualOptimizer super class instead
@@ -121,9 +120,9 @@
                 limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
                 if cell_token.retraced_count < limit:
                     cell_token.retraced_count += 1
-                    debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+                    #debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
                 else:
-                    debug_print("Retrace count reached, jumping to preamble")
+                    #debug_print("Retrace count reached, jumping to preamble")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
@@ -260,7 +259,7 @@
             if op and op.result:
                 preamble_value = exported_state.exported_values[op.result]
                 value = self.optimizer.getvalue(op.result)
-                if not value.is_virtual():
+                if not value.is_virtual() and not value.is_constant():
                     imp = ValueImporter(self, preamble_value, op)
                     self.optimizer.importable_values[value] = imp
                 newvalue = self.optimizer.getvalue(op.result)
@@ -268,12 +267,14 @@
                 # note that emitting here SAME_AS should not happen, but
                 # in case it does, we would prefer to be suboptimal in asm
                 # to a fatal RPython exception.
-                if newresult is not op.result and not newvalue.is_constant():
+                if newresult is not op.result and \
+                   not self.short_boxes.has_producer(newresult) and \
+                   not newvalue.is_constant():
                     op = ResOperation(rop.SAME_AS, [op.result], newresult)
                     self.optimizer._newoperations.append(op)
-                    if self.optimizer.loop.logops:
-                        debug_print('  Falling back to add extra: ' +
-                                    self.optimizer.loop.logops.repr_of_resop(op))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('  Falling back to add extra: ' +
+                    #                self.optimizer.loop.logops.repr_of_resop(op))
                     
         self.optimizer.flush()
         self.optimizer.emitting_dissabled = False
@@ -339,8 +340,8 @@
             if i == len(newoperations):
                 while j < len(jumpargs):
                     a = jumpargs[j]
-                    if self.optimizer.loop.logops:
-                        debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('J:  ' + self.optimizer.loop.logops.repr_of_arg(a))
                     self.import_box(a, inputargs, short_jumpargs, jumpargs)
                     j += 1
             else:
@@ -351,11 +352,11 @@
                 if op.is_guard():
                     args = args + op.getfailargs()
 
-                if self.optimizer.loop.logops:
-                    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
+                #if self.optimizer.loop.logops:
+                #    debug_print('OP: ' + self.optimizer.loop.logops.repr_of_resop(op))
                 for a in args:
-                    if self.optimizer.loop.logops:
-                        debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
+                    #if self.optimizer.loop.logops:
+                    #    debug_print('A:  ' + self.optimizer.loop.logops.repr_of_arg(a))
                     self.import_box(a, inputargs, short_jumpargs, jumpargs)
                 i += 1
             newoperations = self.optimizer.get_newoperations()
@@ -368,18 +369,18 @@
         # that is compatible with the virtual state at the start of the loop
         modifier = VirtualStateAdder(self.optimizer)
         final_virtual_state = modifier.get_virtual_state(original_jumpargs)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print('Closed loop with ')
+        #debug_start('jit-log-virtualstate')
+        #virtual_state.debug_print('Closed loop with ')
         bad = {}
         if not virtual_state.generalization_of(final_virtual_state, bad):
             # We ended up with a virtual state that is not compatible
             # and we are thus unable to jump to the start of the loop
-            final_virtual_state.debug_print("Bad virtual state at end of loop, ",
-                                            bad)
-            debug_stop('jit-log-virtualstate')
+            #final_virtual_state.debug_print("Bad virtual state at end of loop, ",
+            #                                bad)
+            #debug_stop('jit-log-virtualstate')
             raise InvalidLoop
             
-        debug_stop('jit-log-virtualstate')
+        #debug_stop('jit-log-virtualstate')
 
         maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
         if self.optimizer.emitted_guards > maxguards:
@@ -442,9 +443,9 @@
                 self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer,
                                              seen)
 
-        if self.optimizer.loop.logops:
-            debug_print('  Emitting short op: ' +
-                        self.optimizer.loop.logops.repr_of_resop(op))
+        #if self.optimizer.loop.logops:
+        #    debug_print('  Emitting short op: ' +
+        #                self.optimizer.loop.logops.repr_of_resop(op))
 
         optimizer.send_extra_operation(op)
         seen[op.result] = True
@@ -525,8 +526,8 @@
         args = jumpop.getarglist()
         modifier = VirtualStateAdder(self.optimizer)
         virtual_state = modifier.get_virtual_state(args)
-        debug_start('jit-log-virtualstate')
-        virtual_state.debug_print("Looking for ")
+        #debug_start('jit-log-virtualstate')
+        #virtual_state.debug_print("Looking for ")
 
         for target in cell_token.target_tokens:
             if not target.virtual_state:
@@ -535,10 +536,10 @@
             extra_guards = []
 
             bad = {}
-            debugmsg = 'Did not match '
+            #debugmsg = 'Did not match '
             if target.virtual_state.generalization_of(virtual_state, bad):
                 ok = True
-                debugmsg = 'Matched '
+                #debugmsg = 'Matched '
             else:
                 try:
                     cpu = self.optimizer.cpu
@@ -547,13 +548,13 @@
                                                          extra_guards)
 
                     ok = True
-                    debugmsg = 'Guarded to match '
+                    #debugmsg = 'Guarded to match '
                 except InvalidLoop:
                     pass
-            target.virtual_state.debug_print(debugmsg, bad)
+            #target.virtual_state.debug_print(debugmsg, bad)
 
             if ok:
-                debug_stop('jit-log-virtualstate')
+                #debug_stop('jit-log-virtualstate')
 
                 values = [self.getvalue(arg)
                           for arg in jumpop.getarglist()]
@@ -574,13 +575,13 @@
                         newop = inliner.inline_op(shop)
                         self.optimizer.send_extra_operation(newop)
                 except InvalidLoop:
-                    debug_print("Inlining failed unexpectedly",
-                                "jumping to preamble instead")
+                    #debug_print("Inlining failed unexpectedly",
+                    #            "jumping to preamble instead")
                     assert cell_token.target_tokens[0].virtual_state is None
                     jumpop.setdescr(cell_token.target_tokens[0])
                     self.optimizer.send_extra_operation(jumpop)
                 return True
-        debug_stop('jit-log-virtualstate')
+        #debug_stop('jit-log-virtualstate')
         return False
 
 class ValueImporter(object):
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -681,13 +681,14 @@
             self.synthetic[op] = True
 
     def debug_print(self, logops):
-        debug_start('jit-short-boxes')
-        for box, op in self.short_boxes.items():
-            if op:
-                debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
-            else:
-                debug_print(logops.repr_of_arg(box) + ': None')
-        debug_stop('jit-short-boxes')
+        if 0:
+            debug_start('jit-short-boxes')
+            for box, op in self.short_boxes.items():
+                if op:
+                    debug_print(logops.repr_of_arg(box) + ': ' + logops.repr_of_resop(op))
+                else:
+                    debug_print(logops.repr_of_arg(box) + ': None')
+            debug_stop('jit-short-boxes')
 
     def operations(self):
         if not we_are_translated(): # For tests
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -505,14 +505,23 @@
 
         if length.is_constant() and length.box.getint() == 0:
             return
-        copy_str_content(self,
-            src.force_box(self),
-            dst.force_box(self),
-            srcstart.force_box(self),
-            dststart.force_box(self),
-            length.force_box(self),
-            mode, need_next_offset=False
-        )
+        elif (src.is_virtual() and dst.is_virtual() and srcstart.is_constant() and
+            dststart.is_constant() and length.is_constant()):
+
+            src_start = srcstart.force_box(self).getint()
+            dst_start = dststart.force_box(self).getint()
+            for index in range(length.force_box(self).getint()):
+                vresult = self.strgetitem(src, optimizer.ConstantValue(ConstInt(index + src_start)), mode)
+                dst.setitem(index + dst_start, vresult)
+        else:
+            copy_str_content(self,
+                src.force_box(self),
+                dst.force_box(self),
+                srcstart.force_box(self),
+                dststart.force_box(self),
+                length.force_box(self),
+                mode, need_next_offset=False
+            )
 
     def optimize_CALL(self, op):
         # dispatch based on 'oopspecindex' to a method that handles
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -509,6 +509,15 @@
         self._opimpl_setfield_gc_any(sbox, itemsdescr, abox)
         return sbox
 
+    @arguments("descr", "descr", "descr", "descr", "box")
+    def opimpl_newlist_hint(self, structdescr, lengthdescr, itemsdescr,
+                            arraydescr, sizehintbox):
+        sbox = self.opimpl_new(structdescr)
+        self._opimpl_setfield_gc_any(sbox, lengthdescr, history.CONST_FALSE)
+        abox = self.opimpl_new_array(arraydescr, sizehintbox)
+        self._opimpl_setfield_gc_any(sbox, itemsdescr, abox)
+        return sbox
+
     @arguments("box", "descr", "descr", "box")
     def _opimpl_getlistitem_gc_any(self, listbox, itemsdescr, arraydescr,
                                    indexbox):
@@ -974,9 +983,11 @@
         any_operation = len(self.metainterp.history.operations) > 0
         jitdriver_sd = self.metainterp.staticdata.jitdrivers_sd[jdindex]
         self.verify_green_args(jitdriver_sd, greenboxes)
-        self.debug_merge_point(jitdriver_sd, jdindex, self.metainterp.portal_call_depth,
+        self.debug_merge_point(jitdriver_sd, jdindex,
+                               self.metainterp.portal_call_depth,
+                               self.metainterp.call_ids[-1],
                                greenboxes)
-        
+
         if self.metainterp.seen_loop_header_for_jdindex < 0:
             if not any_operation:
                 return
@@ -1028,11 +1039,11 @@
                                     assembler_call=True)
             raise ChangeFrame
 
-    def debug_merge_point(self, jitdriver_sd, jd_index, portal_call_depth, greenkey):
+    def debug_merge_point(self, jitdriver_sd, jd_index, portal_call_depth, current_call_id, greenkey):
         # debugging: produce a DEBUG_MERGE_POINT operation
         loc = jitdriver_sd.warmstate.get_location_str(greenkey)
         debug_print(loc)
-        args = [ConstInt(jd_index), ConstInt(portal_call_depth)] + greenkey
+        args = [ConstInt(jd_index), ConstInt(portal_call_depth), ConstInt(current_call_id)] + greenkey
         self.metainterp.history.record(rop.DEBUG_MERGE_POINT, args, None)
 
     @arguments("box", "label")
@@ -1574,11 +1585,14 @@
         self.call_pure_results = args_dict_box()
         self.heapcache = HeapCache()
 
+        self.call_ids = []
+        self.current_call_id = 0
+
     def retrace_needed(self, trace):
         self.partial_trace = trace
         self.retracing_from = len(self.history.operations) - 1
         self.heapcache.reset()
-        
+
 
     def perform_call(self, jitcode, boxes, greenkey=None):
         # causes the metainterp to enter the given subfunction
@@ -1592,6 +1606,8 @@
     def newframe(self, jitcode, greenkey=None):
         if jitcode.is_portal:
             self.portal_call_depth += 1
+            self.call_ids.append(self.current_call_id)
+            self.current_call_id += 1
         if greenkey is not None and self.is_main_jitcode(jitcode):
             self.portal_trace_positions.append(
                     (greenkey, len(self.history.operations)))
@@ -1608,6 +1624,7 @@
         jitcode = frame.jitcode
         if jitcode.is_portal:
             self.portal_call_depth -= 1
+            self.call_ids.pop()
         if frame.greenkey is not None and self.is_main_jitcode(jitcode):
             self.portal_trace_positions.append(
                     (None, len(self.history.operations)))
@@ -1976,7 +1993,7 @@
                 # Found!  Compile it as a loop.
                 # raises in case it works -- which is the common case
                 if self.partial_trace:
-                    if  start != self.retracing_from: 
+                    if  start != self.retracing_from:
                         raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
                 self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
@@ -2064,11 +2081,12 @@
             pass # XXX we want to do something special in resume descr,
                  # but not now
         elif opnum == rop.GUARD_NO_OVERFLOW:   # an overflow now detected
-            self.execute_raised(OverflowError(), constant=True)
-            try:
-                self.finishframe_exception()
-            except ChangeFrame:
-                pass
+            if not dont_change_position:
+                self.execute_raised(OverflowError(), constant=True)
+                try:
+                    self.finishframe_exception()
+                except ChangeFrame:
+                    pass
         elif opnum == rop.GUARD_OVERFLOW:      # no longer overflowing
             self.clear_exception()
         else:
@@ -2084,7 +2102,7 @@
             if not token.target_tokens:
                 return None
         return token
-        
+
     def compile_loop(self, original_boxes, live_arg_boxes, start, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = original_boxes[:num_green_args]
@@ -2349,7 +2367,7 @@
             # warmstate.py.
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            assert not vinfo.gettoken(virtualizable)
+            assert not vinfo.is_token_nonnull_gcref(virtualizable)
             # fill the virtualizable with the local boxes
             self.synchronize_virtualizable()
         #
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -1101,14 +1101,14 @@
         virtualizable = self.decode_ref(numb.nums[index])
         if self.resume_after_guard_not_forced == 1:
             # in the middle of handle_async_forcing()
-            assert vinfo.gettoken(virtualizable)
-            vinfo.settoken(virtualizable, vinfo.TOKEN_NONE)
+            assert vinfo.is_token_nonnull_gcref(virtualizable)
+            vinfo.reset_token_gcref(virtualizable)
         else:
             # just jumped away from assembler (case 4 in the comment in
             # virtualizable.py) into tracing (case 2); check that vable_token
             # is and stays 0.  Note the call to reset_vable_token() in
             # warmstate.py.
-            assert not vinfo.gettoken(virtualizable)
+            assert not vinfo.is_token_nonnull_gcref(virtualizable)
         return vinfo.write_from_resume_data_partial(virtualizable, self, numb)
 
     def load_value_of_type(self, TYPE, tagged):
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -144,7 +144,7 @@
                            'int_mul': 1, 'guard_true': 2, 'int_sub': 2})
 
 
-    def test_loop_invariant_mul_ovf(self):
+    def test_loop_invariant_mul_ovf1(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         def f(x, y):
             res = 0
@@ -235,6 +235,65 @@
                            'guard_true': 4, 'int_sub': 4, 'jump': 3,
                            'int_mul': 3, 'int_add': 4})
 
+    def test_loop_invariant_mul_ovf2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
+        def f(x, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x=x, y=y, res=res)
+                myjitdriver.jit_merge_point(x=x, y=y, res=res)
+                b = y * 2
+                try:
+                    res += ovfcheck(x * x) + b
+                except OverflowError:
+                    res += 1
+                y -= 1
+            return res
+        res = self.meta_interp(f, [sys.maxint, 7])
+        assert res == f(sys.maxint, 7)
+        self.check_trace_count(1)
+        res = self.meta_interp(f, [6, 7])
+        assert res == 308
+
+    def test_loop_invariant_mul_bridge_ovf1(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x1', 'x2'])
+        def f(x1, x2, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x1=x1, x2=x2, y=y, res=res)
+                myjitdriver.jit_merge_point(x1=x1, x2=x2, y=y, res=res)
+                try:
+                    res += ovfcheck(x1 * x1)
+                except OverflowError:
+                    res += 1
+                if y<32 and (y>>2)&1==0:
+                    x1, x2 = x2, x1
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, sys.maxint, 48])
+        assert res == f(6, sys.maxint, 48)
+
+    def test_loop_invariant_mul_bridge_ovf2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x1', 'x2', 'n'])
+        def f(x1, x2, n, y):
+            res = 0
+            while y > 0:
+                myjitdriver.can_enter_jit(x1=x1, x2=x2, y=y, res=res, n=n)
+                myjitdriver.jit_merge_point(x1=x1, x2=x2, y=y, res=res, n=n)
+                try:
+                    res += ovfcheck(x1 * x1)
+                except OverflowError:
+                    res += 1
+                y -= 1
+                if y&4 == 0:
+                    x1, x2 = x2, x1
+            return res
+        res = self.meta_interp(f, [6, sys.maxint, 32, 48])
+        assert res == f(6, sys.maxint, 32, 48)
+        res = self.meta_interp(f, [sys.maxint, 6, 32, 48])
+        assert res == f(sys.maxint, 6, 32, 48)
+        
+
     def test_loop_invariant_intbox(self):
         myjitdriver = JitDriver(greens = [], reds = ['y', 'res', 'x'])
         class I:
@@ -2943,11 +3002,18 @@
         self.check_resops(arraylen_gc=3)
 
     def test_ulonglong_mod(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'sa', 'i'])
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'a'])
+        class A:
+            pass
         def f(n):
             sa = i = rffi.cast(rffi.ULONGLONG, 1)
+            a = A()
             while i < rffi.cast(rffi.ULONGLONG, n):
-                myjitdriver.jit_merge_point(sa=sa, n=n, i=i)
+                a.sa = sa
+                a.i = i
+                myjitdriver.jit_merge_point(n=n, a=a)
+                sa = a.sa
+                i = a.i
                 sa += sa % i
                 i += 1
         res = self.meta_interp(f, [32])
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -14,7 +14,7 @@
     ts = typesystem.llhelper
     def __init__(self):
         self.seen = []
-    def compile_loop(self, inputargs, operations, token, name=''):
+    def compile_loop(self, inputargs, operations, token, log=True, name=''):
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
diff --git a/pypy/jit/metainterp/test/test_list.py b/pypy/jit/metainterp/test/test_list.py
--- a/pypy/jit/metainterp/test/test_list.py
+++ b/pypy/jit/metainterp/test/test_list.py
@@ -1,4 +1,5 @@
 import py
+from pypy.rlib.objectmodel import newlist_hint
 from pypy.rlib.jit import JitDriver
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
@@ -228,6 +229,27 @@
         self.check_resops({'jump': 1, 'int_gt': 2, 'int_add': 2,
                            'guard_true': 2, 'int_sub': 2})
 
+    def test_newlist_hint(self):
+        def f(i):
+            l = newlist_hint(i)
+            return len(l)
+
+        r = self.interp_operations(f, [3])
+        assert r == 0
+
+    def test_newlist_hint_optimized(self):
+        driver = JitDriver(greens = [], reds = ['i'])
+
+        def f(i):
+            while i > 0:
+                driver.jit_merge_point(i=i)
+                l = newlist_hint(5)
+                l.append(1)
+                i -= l[0]
+
+        self.meta_interp(f, [10], listops=True)
+        self.check_resops(new_array=0, call=0)
+
 class TestOOtype(ListTests, OOJitMixin):
     pass
 
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -54,7 +54,7 @@
         class FakeJitDriver(object):
             class warmstate(object):
                 get_location_str = staticmethod(lambda args: "dupa")
-        
+
         class FakeMetaInterpSd:
             cpu = AbstractCPU()
             cpu.ts = self.ts
@@ -77,7 +77,7 @@
             equaloplists(loop.operations, oloop.operations)
             assert oloop.inputargs == loop.inputargs
         return logger, loop, oloop
-    
+
     def test_simple(self):
         inp = '''
         [i0, i1, i2, p3, p4, p5]
@@ -116,12 +116,13 @@
     def test_debug_merge_point(self):
         inp = '''
         []
-        debug_merge_point(0, 0)
+        debug_merge_point(0, 0, 0)
         '''
         _, loop, oloop = self.reparse(inp, check_equal=False)
         assert loop.operations[0].getarg(1).getint() == 0
-        assert oloop.operations[0].getarg(1)._get_str() == "dupa"
-        
+        assert loop.operations[0].getarg(2).getint() == 0
+        assert oloop.operations[0].getarg(2)._get_str() == "dupa"
+
     def test_floats(self):
         inp = '''
         [f0]
@@ -142,7 +143,7 @@
         output = logger.log_loop(loop)
         assert output.splitlines()[-1] == "jump(i0, descr=<Loop3>)"
         pure_parse(output)
-        
+
     def test_guard_descr(self):
         namespace = {'fdescr': BasicFailDescr()}
         inp = '''
@@ -154,7 +155,7 @@
         output = logger.log_loop(loop)
         assert output.splitlines()[-1] == "guard_true(i0, descr=<Guard0>) [i0]"
         pure_parse(output)
-        
+
         logger = Logger(self.make_metainterp_sd(), guard_number=False)
         output = logger.log_loop(loop)
         lastline = output.splitlines()[-1]
diff --git a/pypy/jit/metainterp/test/test_quasiimmut.py b/pypy/jit/metainterp/test/test_quasiimmut.py
--- a/pypy/jit/metainterp/test/test_quasiimmut.py
+++ b/pypy/jit/metainterp/test/test_quasiimmut.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.quasiimmut import get_current_qmut_instance
 from pypy.jit.metainterp.test.support import LLJitMixin
 from pypy.jit.codewriter.policy import StopAtXPolicy
-from pypy.rlib.jit import JitDriver, dont_look_inside
+from pypy.rlib.jit import JitDriver, dont_look_inside, unroll_safe
 
 
 def test_get_current_qmut_instance():
@@ -480,6 +480,32 @@
         assert res == 1
         self.check_jitcell_token_count(2)
 
+    def test_for_loop_array(self):
+        myjitdriver = JitDriver(greens=[], reds=["n", "i"])
+        class Foo(object):
+            _immutable_fields_ = ["x?[*]"]
+            def __init__(self, x):
+                self.x = x
+        f = Foo([1, 3, 5, 6])
+        @unroll_safe
+        def g(v):
+            for x in f.x:
+                if x & 1 == 0:
+                    v += 1
+            return v
+        def main(n):
+            i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i)
+                i = g(i)
+            return i
+        res = self.meta_interp(main, [10])
+        assert res == 10
+        self.check_resops({
+            "int_add": 2, "int_lt": 2, "jump": 1, "guard_true": 2,
+            "guard_not_invalidated": 2
+        })
+
 
 class TestLLtypeGreenFieldsTests(QuasiImmutTests, LLJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -13,7 +13,7 @@
 
 
 class WarmspotTests(object):
-    
+
     def test_basic(self):
         mydriver = JitDriver(reds=['a'],
                              greens=['i'])
@@ -77,16 +77,16 @@
         self.meta_interp(f, [123, 10])
         assert len(get_stats().locations) >= 4
         for loc in get_stats().locations:
-            assert loc == (0, 123)
+            assert loc == (0, 0, 123)
 
     def test_set_param_enable_opts(self):
         from pypy.rpython.annlowlevel import llstr, hlstr
-        
+
         myjitdriver = JitDriver(greens = [], reds = ['n'])
         class A(object):
             def m(self, n):
                 return n-1
-            
+
         def g(n):
             while n > 0:
                 myjitdriver.can_enter_jit(n=n)
@@ -332,7 +332,7 @@
             ts = llhelper
             translate_support_code = False
             stats = "stats"
-            
+
             def get_fail_descr_number(self, d):
                 return -1
 
@@ -352,7 +352,7 @@
                 return "not callable"
 
         driver = JitDriver(reds = ['red'], greens = ['green'])
-        
+
         def f(green):
             red = 0
             while red < 10:
diff --git a/pypy/jit/metainterp/virtualizable.py b/pypy/jit/metainterp/virtualizable.py
--- a/pypy/jit/metainterp/virtualizable.py
+++ b/pypy/jit/metainterp/virtualizable.py
@@ -262,15 +262,15 @@
         force_now._dont_inline_ = True
         self.force_now = force_now
 
-        def gettoken(virtualizable):
+        def is_token_nonnull_gcref(virtualizable):
             virtualizable = cast_gcref_to_vtype(virtualizable)
-            return virtualizable.vable_token
-        self.gettoken = gettoken
+            return bool(virtualizable.vable_token)
+        self.is_token_nonnull_gcref = is_token_nonnull_gcref
 
-        def settoken(virtualizable, token):
+        def reset_token_gcref(virtualizable):
             virtualizable = cast_gcref_to_vtype(virtualizable)
-            virtualizable.vable_token = token
-        self.settoken = settoken
+            virtualizable.vable_token = VirtualizableInfo.TOKEN_NONE
+        self.reset_token_gcref = reset_token_gcref
 
     def _freeze_(self):
         return True
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -100,7 +100,7 @@
     if not kwds.get('translate_support_code', False):
         warmrunnerdesc.metainterp_sd.profiler.finish()
         warmrunnerdesc.metainterp_sd.cpu.finish_once()
-    print '~~~ return value:', res
+    print '~~~ return value:', repr(res)
     while repeat > 1:
         print '~' * 79
         res1 = interp.eval_graph(graph, args)
diff --git a/pypy/jit/tl/tinyframe/tinyframe.py b/pypy/jit/tl/tinyframe/tinyframe.py
--- a/pypy/jit/tl/tinyframe/tinyframe.py
+++ b/pypy/jit/tl/tinyframe/tinyframe.py
@@ -210,7 +210,7 @@
     def repr(self):
         return "<function %s(%s)>" % (self.outer.repr(), self.inner.repr())
 
-driver = JitDriver(greens = ['code', 'i'], reds = ['self'],
+driver = JitDriver(greens = ['i', 'code'], reds = ['self'],
                    virtualizables = ['self'])
 
 class Frame(object):
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -146,16 +146,18 @@
     def test_debug_merge_point(self):
         x = '''
         []
-        debug_merge_point(0, "info")
-        debug_merge_point(0, 'info')
-        debug_merge_point(1, '<some ('other.')> info')
-        debug_merge_point(0, '(stuff) #1')
+        debug_merge_point(0, 0, "info")
+        debug_merge_point(0, 0, 'info')
+        debug_merge_point(1, 1, '<some ('other.')> info')
+        debug_merge_point(0, 0, '(stuff) #1')
         '''
         loop = self.parse(x)
-        assert loop.operations[0].getarg(1)._get_str() == 'info'
-        assert loop.operations[1].getarg(1)._get_str() == 'info'
-        assert loop.operations[2].getarg(1)._get_str() == "<some ('other.')> info"
-        assert loop.operations[3].getarg(1)._get_str() == "(stuff) #1"
+        assert loop.operations[0].getarg(2)._get_str() == 'info'
+        assert loop.operations[0].getarg(1).value == 0
+        assert loop.operations[1].getarg(2)._get_str() == 'info'
+        assert loop.operations[2].getarg(2)._get_str() == "<some ('other.')> info"
+        assert loop.operations[2].getarg(1).value == 1
+        assert loop.operations[3].getarg(2)._get_str() == "(stuff) #1"
 
 
     def test_descr_with_obj_print(self):
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -1,5 +1,5 @@
+import sys
 
-# Package initialisation
 from pypy.interpreter.mixedmodule import MixedModule
 from pypy.module.imp.importing import get_pyc_magic
 
@@ -12,6 +12,19 @@
         "UnicodeBuilder": "interp_builders.W_UnicodeBuilder",
     }
 
+class TimeModule(MixedModule):
+    appleveldefs = {}
+    interpleveldefs = {}
+    if sys.platform.startswith("linux"):
+        interpleveldefs["clock_gettime"] = "interp_time.clock_gettime"
+        interpleveldefs["clock_getres"] = "interp_time.clock_getres"
+        for name in [
+            "CLOCK_REALTIME", "CLOCK_MONOTONIC", "CLOCK_MONOTONIC_RAW",
+            "CLOCK_PROCESS_CPUTIME_ID", "CLOCK_THREAD_CPUTIME_ID"
+        ]:
+            interpleveldefs[name] = "space.wrap(interp_time.%s)" % name
+
+
 class Module(MixedModule):
     appleveldefs = {
     }
@@ -32,6 +45,7 @@
 
     submodules = {
         "builders": BuildersModule,
+        "time": TimeModule,
     }
 
     def setup_after_space_initialization(self):
diff --git a/pypy/module/__pypy__/interp_time.py b/pypy/module/__pypy__/interp_time.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/interp_time.py
@@ -0,0 +1,64 @@
+import sys
+
+from pypy.interpreter.error import exception_from_errno
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rpython.tool import rffi_platform
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+
+class CConfig:
+    _compilation_info_ = ExternalCompilationInfo(
+        includes=["time.h"],
+        libraries=["rt"],
+    )
+
+    HAS_CLOCK_GETTIME = rffi_platform.Has('clock_gettime')
+
+    CLOCK_REALTIME = rffi_platform.DefinedConstantInteger("CLOCK_REALTIME")
+    CLOCK_MONOTONIC = rffi_platform.DefinedConstantInteger("CLOCK_MONOTONIC")
+    CLOCK_MONOTONIC_RAW = rffi_platform.DefinedConstantInteger("CLOCK_MONOTONIC_RAW")
+    CLOCK_PROCESS_CPUTIME_ID = rffi_platform.DefinedConstantInteger("CLOCK_PROCESS_CPUTIME_ID")
+    CLOCK_THREAD_CPUTIME_ID = rffi_platform.DefinedConstantInteger("CLOCK_THREAD_CPUTIME_ID")
+
+    TIMESPEC = rffi_platform.Struct("struct timespec", [
+        ("tv_sec", rffi.TIME_T),
+        ("tv_nsec", rffi.LONG),
+    ])
+
+cconfig = rffi_platform.configure(CConfig)
+
+HAS_CLOCK_GETTIME = cconfig["HAS_CLOCK_GETTIME"]
+
+CLOCK_REALTIME = cconfig["CLOCK_REALTIME"]
+CLOCK_MONOTONIC = cconfig["CLOCK_MONOTONIC"]
+CLOCK_MONOTONIC_RAW = cconfig["CLOCK_MONOTONIC_RAW"]
+CLOCK_PROCESS_CPUTIME_ID = cconfig["CLOCK_PROCESS_CPUTIME_ID"]
+CLOCK_THREAD_CPUTIME_ID = cconfig["CLOCK_THREAD_CPUTIME_ID"]
+
+TIMESPEC = cconfig["TIMESPEC"]
+
+c_clock_gettime = rffi.llexternal("clock_gettime",
+    [lltype.Signed, lltype.Ptr(TIMESPEC)], rffi.INT,
+    compilation_info=CConfig._compilation_info_, threadsafe=False
+)
+c_clock_getres = rffi.llexternal("clock_getres",
+    [lltype.Signed, lltype.Ptr(TIMESPEC)], rffi.INT,
+    compilation_info=CConfig._compilation_info_, threadsafe=False
+)
+
+ at unwrap_spec(clk_id="c_int")
+def clock_gettime(space, clk_id):
+    with lltype.scoped_alloc(TIMESPEC) as tp:
+        ret = c_clock_gettime(clk_id, tp)
+        if ret != 0:
+            raise exception_from_errno(space, space.w_IOError)
+        return space.wrap(tp.c_tv_sec + tp.c_tv_nsec * 1e-9)
+
+ at unwrap_spec(clk_id="c_int")
+def clock_getres(space, clk_id):
+    with lltype.scoped_alloc(TIMESPEC) as tp:
+        ret = c_clock_getres(clk_id, tp)
+        if ret != 0:
+            raise exception_from_errno(space, space.w_IOError)
+        return space.wrap(tp.c_tv_sec + tp.c_tv_nsec * 1e-9)
diff --git a/pypy/module/__pypy__/test/test_time.py b/pypy/module/__pypy__/test/test_time.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/__pypy__/test/test_time.py
@@ -0,0 +1,26 @@
+import py
+
+from pypy.module.__pypy__.interp_time import HAS_CLOCK_GETTIME
+
+
+class AppTestTime(object):
+    def setup_class(cls):
+        if not HAS_CLOCK_GETTIME:
+            py.test.skip("need time.clock_gettime")
+
+    def test_clock_realtime(self):
+        from __pypy__ import time
+        res = time.clock_gettime(time.CLOCK_REALTIME)
+        assert isinstance(res, float)
+
+    def test_clock_monotonic(self):
+        from __pypy__ import time
+        a = time.clock_gettime(time.CLOCK_MONOTONIC)
+        b = time.clock_gettime(time.CLOCK_MONOTONIC)
+        assert a <= b
+
+    def test_clock_getres(self):
+        from __pypy__ import time
+        res = time.clock_getres(time.CLOCK_REALTIME)
+        assert res > 0.0
+        assert res <= 1.0
diff --git a/pypy/module/_ffi/test/test__ffi.py b/pypy/module/_ffi/test/test__ffi.py
--- a/pypy/module/_ffi/test/test__ffi.py
+++ b/pypy/module/_ffi/test/test__ffi.py
@@ -100,7 +100,10 @@
         from _ffi import CDLL, types
         libm = CDLL(self.libm_name)
         pow_addr = libm.getaddressindll('pow')
-        assert pow_addr == self.pow_addr & (sys.maxint*2-1)
+        fff = sys.maxint*2-1
+        if sys.platform == 'win32':
+            fff = sys.maxint*2+1
+        assert pow_addr == self.pow_addr & fff
 
     def test_func_fromaddr(self):
         import sys
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -5,14 +5,13 @@
 from pypy.rlib import streamio
 from pypy.rlib.rarithmetic import r_longlong
 from pypy.rlib.rstring import StringBuilder
-from pypy.module._file.interp_stream import (W_AbstractStream, StreamErrors,
-    wrap_streamerror, wrap_oserror_as_ioerror)
+from pypy.module._file.interp_stream import W_AbstractStream, StreamErrors
 from pypy.module.posix.interp_posix import dispatch_filename
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
     interp_attrproperty, make_weakref_descr, interp_attrproperty_w)
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-
+from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
 
 class W_File(W_AbstractStream):
     """An interp-level file object.  This implements the same interface than
diff --git a/pypy/module/_file/interp_stream.py b/pypy/module/_file/interp_stream.py
--- a/pypy/module/_file/interp_stream.py
+++ b/pypy/module/_file/interp_stream.py
@@ -2,27 +2,13 @@
 from pypy.rlib import streamio
 from pypy.rlib.streamio import StreamErrors
 
-from pypy.interpreter.error import OperationError, wrap_oserror2
+from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
 
 
-def wrap_streamerror(space, e, w_filename=None):
-    if isinstance(e, streamio.StreamError):
-        return OperationError(space.w_ValueError,
-                              space.wrap(e.message))
-    elif isinstance(e, OSError):
-        return wrap_oserror_as_ioerror(space, e, w_filename)
-    else:
-        # should not happen: wrap_streamerror() is only called when
-        # StreamErrors = (OSError, StreamError) are raised
-        return OperationError(space.w_IOError, space.w_None)
-
-def wrap_oserror_as_ioerror(space, e, w_filename=None):
-    return wrap_oserror2(space, e, w_filename,
-                         w_exception_class=space.w_IOError)
-
 class W_AbstractStream(Wrappable):
     """Base class for interp-level objects that expose streams to app-level"""
     slock = None
diff --git a/pypy/module/_io/__init__.py b/pypy/module/_io/__init__.py
--- a/pypy/module/_io/__init__.py
+++ b/pypy/module/_io/__init__.py
@@ -28,6 +28,7 @@
         }
 
     def init(self, space):
+        MixedModule.init(self, space)
         w_UnsupportedOperation = space.call_function(
             space.w_type,
             space.wrap('UnsupportedOperation'),
@@ -35,3 +36,9 @@
             space.newdict())
         space.setattr(self, space.wrap('UnsupportedOperation'),
                       w_UnsupportedOperation)
+
+    def shutdown(self, space):
+        # at shutdown, flush all open streams.  Ignore I/O errors.
+        from pypy.module._io.interp_iobase import get_autoflushher
+        get_autoflushher(space).flush_all(space)
+
diff --git a/pypy/module/_io/interp_iobase.py b/pypy/module/_io/interp_iobase.py
--- a/pypy/module/_io/interp_iobase.py
+++ b/pypy/module/_io/interp_iobase.py
@@ -5,6 +5,8 @@
 from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.rlib.rstring import StringBuilder
+from pypy.rlib import rweakref
+
 
 DEFAULT_BUFFER_SIZE = 8192
 
@@ -43,6 +45,8 @@
         self.space = space
         self.w_dict = space.newdict()
         self.__IOBase_closed = False
+        self.streamholder = None # needed by AutoFlusher
+        get_autoflushher(space).add(self)
 
     def getdict(self, space):
         return self.w_dict
@@ -98,6 +102,7 @@
             space.call_method(self, "flush")
         finally:
             self.__IOBase_closed = True
+            get_autoflushher(space).remove(self)
 
     def flush_w(self, space):
         if self._CLOSED():
@@ -303,3 +308,60 @@
     read = interp2app(W_RawIOBase.read_w),
     readall = interp2app(W_RawIOBase.readall_w),
 )
+
+
+# ------------------------------------------------------------
+# functions to make sure that all streams are flushed on exit
+# ------------------------------------------------------------
+
+class StreamHolder(object):
+
+    def __init__(self, w_iobase):
+        self.w_iobase_ref = rweakref.ref(w_iobase)
+        w_iobase.autoflusher = self
+
+    def autoflush(self, space):
+        w_iobase = self.w_iobase_ref()
+        if w_iobase is not None:
+            try:
+                space.call_method(w_iobase, 'flush')
+            except OperationError, e:
+                # if it's an IOError or ValueError, ignore it (ValueError is
+                # raised if by chance we are trying to flush a file which has
+                # already been closed)
+                if not (e.match(space, space.w_IOError) or
+                        e.match(space, space.w_ValueError)):
+                    raise
+        
+
+class AutoFlusher(object):
+    
+    def __init__(self, space):
+        self.streams = {}
+
+    def add(self, w_iobase):
+        assert w_iobase.streamholder is None
+        holder = StreamHolder(w_iobase)
+        w_iobase.streamholder = holder
+        self.streams[holder] = None
+
+    def remove(self, w_iobase):
+        holder = w_iobase.streamholder
+        if holder is not None:
+            del self.streams[holder]
+
+    def flush_all(self, space):
+        while self.streams:
+            for streamholder in self.streams.keys():
+                try:
+                    del self.streams[streamholder]
+                except KeyError:
+                    pass    # key was removed in the meantime
+                else:
+                    streamholder.autoflush(space)
+
+
+def get_autoflushher(space):
+    return space.fromcache(AutoFlusher)
+
+
diff --git a/pypy/module/_io/test/test_fileio.py b/pypy/module/_io/test/test_fileio.py
--- a/pypy/module/_io/test/test_fileio.py
+++ b/pypy/module/_io/test/test_fileio.py
@@ -160,3 +160,42 @@
         f.close()
         assert repr(f) == "<_io.FileIO [closed]>"
 
+def test_flush_at_exit():
+    from pypy import conftest
+    from pypy.tool.option import make_config, make_objspace
+    from pypy.tool.udir import udir
+
+    tmpfile = udir.join('test_flush_at_exit')
+    config = make_config(conftest.option)
+    space = make_objspace(config)
+    space.appexec([space.wrap(str(tmpfile))], """(tmpfile):
+        import io
+        f = io.open(tmpfile, 'w', encoding='ascii')
+        f.write('42')
+        # no flush() and no close()
+        import sys; sys._keepalivesomewhereobscure = f
+    """)
+    space.finish()
+    assert tmpfile.read() == '42'
+
+def test_flush_at_exit_IOError_and_ValueError():
+    from pypy import conftest
+    from pypy.tool.option import make_config, make_objspace
+
+    config = make_config(conftest.option)
+    space = make_objspace(config)
+    space.appexec([], """():
+        import io
+        class MyStream(io.IOBase):
+            def flush(self):
+                raise IOError
+
+        class MyStream2(io.IOBase):
+            def flush(self):
+                raise ValueError
+
+        s = MyStream()
+        s2 = MyStream2()
+        import sys; sys._keepalivesomewhereobscure = s
+    """)
+    space.finish() # the IOError has been ignored
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -22,7 +22,7 @@
 eci = ExternalCompilationInfo(
     separate_module_files=[srcdir.join('profiling.c')],
     export_symbols=['pypy_setup_profiling', 'pypy_teardown_profiling'])
-                                                     
+
 c_setup_profiling = rffi.llexternal('pypy_setup_profiling',
                                   [], lltype.Void,
                                   compilation_info = eci)
@@ -228,7 +228,7 @@
         if w_self.builtins:
             key = create_spec(space, w_arg)
             w_self._enter_builtin_call(key)
-    elif event == 'c_return':
+    elif event == 'c_return' or event == 'c_exception':
         if w_self.builtins:
             key = create_spec(space, w_arg)
             w_self._enter_builtin_return(key)
@@ -237,7 +237,7 @@
         pass
 
 class W_Profiler(Wrappable):
-    
+
     def __init__(self, space, w_callable, time_unit, subcalls, builtins):
         self.subcalls = subcalls
         self.builtins = builtins
diff --git a/pypy/module/_lsprof/test/test_cprofile.py b/pypy/module/_lsprof/test/test_cprofile.py
--- a/pypy/module/_lsprof/test/test_cprofile.py
+++ b/pypy/module/_lsprof/test/test_cprofile.py
@@ -117,6 +117,20 @@
             assert 0.9 < subentry.totaltime < 2.9
             #assert 0.9 < subentry.inlinetime < 2.9
 
+    def test_builtin_exception(self):
+        import math
+        import _lsprof
+
+        prof = _lsprof.Profiler()
+        prof.enable()
+        try:
+            math.sqrt("a")
+        except TypeError:
+            pass
+        prof.disable()
+        stats = prof.getstats()
+        assert len(stats) == 2
+
     def test_use_cprofile(self):
         import sys, os
         # XXX this is evil trickery to walk around the fact that we don't
diff --git a/pypy/module/_md5/test/test_md5.py b/pypy/module/_md5/test/test_md5.py
--- a/pypy/module/_md5/test/test_md5.py
+++ b/pypy/module/_md5/test/test_md5.py
@@ -28,7 +28,7 @@
         assert self.md5.digest_size == 16
         #assert self.md5.digestsize == 16        -- not on CPython
         assert self.md5.md5().digest_size == 16
-        if sys.version >= (2, 5):
+        if sys.version_info >= (2, 5):
             assert self.md5.blocksize == 1
             assert self.md5.md5().digestsize == 16
 
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -11,6 +11,7 @@
 from pypy.objspace.std.register_all import register_all
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import specialize
 from pypy.rpython.lltypesystem import lltype, rffi
 
 
@@ -159,13 +160,15 @@
 
 
 def make_array(mytype):
+    W_ArrayBase = globals()['W_ArrayBase']
+
     class W_Array(W_ArrayBase):
         itemsize = mytype.bytes
         typecode = mytype.typecode
 
         @staticmethod
         def register(typeorder):
-            typeorder[W_Array] = []
+            typeorder[W_Array] = [(W_ArrayBase, None)]
 
         def __init__(self, space):
             self.space = space
@@ -583,13 +586,29 @@
             raise OperationError(space.w_ValueError, space.wrap(msg))
 
     # Compare methods
-    def cmp__Array_ANY(space, self, other):
-        if isinstance(other, W_ArrayBase):
-            w_lst1 = array_tolist__Array(space, self)
-            w_lst2 = space.call_method(other, 'tolist')
-            return space.cmp(w_lst1, w_lst2)
-        else:
-            return space.w_NotImplemented
+    @specialize.arg(3)
+    def _cmp_impl(space, self, other, space_fn):
+        w_lst1 = array_tolist__Array(space, self)
+        w_lst2 = space.call_method(other, 'tolist')
+        return space_fn(w_lst1, w_lst2)
+
+    def eq__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.eq)
+
+    def ne__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.ne)
+
+    def lt__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.lt)
+
+    def le__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.le)
+
+    def gt__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.gt)
+
+    def ge__Array_ArrayBase(space, self, other):
+        return _cmp_impl(space, self, other, space.ge)
 
     # Misc methods
 
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -536,12 +536,6 @@
                 assert (a >= c) is False
                 assert (c >= a) is True
 
-                assert cmp(a, a) == 0
-                assert cmp(a, b) == 0
-                assert cmp(a, c) <  0
-                assert cmp(b, a) == 0
-                assert cmp(c, a) >  0
-
     def test_reduce(self):
         import pickle
         a = self.array('i', [1, 2, 3])
@@ -851,8 +845,11 @@
         cls.maxint = sys.maxint
 
 class AppTestArray(BaseArrayTests):
+    OPTIONS = {}
+
     def setup_class(cls):
-        cls.space = gettestobjspace(usemodules=('array', 'struct', '_rawffi'))
+        cls.space = gettestobjspace(usemodules=('array', 'struct', '_rawffi'),
+                                    **cls.OPTIONS)
         cls.w_array = cls.space.appexec([], """():
             import array
             return array.array
@@ -874,3 +871,7 @@
         a = self.array('b', range(4))
         a[::-1] = a
         assert a == self.array('b', [3, 2, 1, 0])
+
+
+class AppTestArrayBuiltinShortcut(AppTestArray):
+    OPTIONS = {'objspace.std.builtinshortcut': True}
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -352,6 +352,9 @@
     'PyObject_AsReadBuffer', 'PyObject_AsWriteBuffer', 'PyObject_CheckReadBuffer',
 
     'PyOS_getsig', 'PyOS_setsig',
+    'PyThread_create_key', 'PyThread_delete_key', 'PyThread_set_key_value',
+    'PyThread_get_key_value', 'PyThread_delete_key_value',
+    'PyThread_ReInitTLS',
 
     'PyStructSequence_InitType', 'PyStructSequence_New',
 ]
@@ -385,6 +388,7 @@
         "Tuple": "space.w_tuple",
         "List": "space.w_list",
         "Set": "space.w_set",
+        "FrozenSet": "space.w_frozenset",
         "Int": "space.w_int",
         "Bool": "space.w_bool",
         "Float": "space.w_float",
@@ -406,7 +410,7 @@
         }.items():
         GLOBALS['Py%s_Type#' % (cpyname, )] = ('PyTypeObject*', pypyexpr)
 
-    for cpyname in 'Method List Int Long Dict Tuple Class'.split():
+    for cpyname in 'Method List Long Dict Tuple Class'.split():
         FORWARD_DECLS.append('typedef struct { PyObject_HEAD } '
                              'Py%sObject' % (cpyname, ))
 build_exported_objects()
@@ -616,6 +620,10 @@
         lambda space: init_pycobject(),
         lambda space: init_capsule(),
     ])
+    from pypy.module.posix.interp_posix import add_fork_hook
+    reinit_tls = rffi.llexternal('PyThread_ReInitTLS', [], lltype.Void,
+                                 compilation_info=eci)    
+    add_fork_hook('child', reinit_tls)
 
 def init_function(func):
     INIT_FUNCTIONS.append(func)
@@ -925,6 +933,7 @@
                                source_dir / "structseq.c",
                                source_dir / "capsule.c",
                                source_dir / "pysignals.c",
+                               source_dir / "thread.c",
                                ],
         separate_module_sources=separate_module_sources,
         export_symbols=export_symbols_eci,
diff --git a/pypy/module/cpyext/eval.py b/pypy/module/cpyext/eval.py
--- a/pypy/module/cpyext/eval.py
+++ b/pypy/module/cpyext/eval.py
@@ -1,16 +1,24 @@
 from pypy.interpreter.error import OperationError
+from pypy.interpreter.astcompiler import consts
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, CANNOT_FAIL, CONST_STRING, FILEP, fread, feof, Py_ssize_tP,
     cpython_struct)
 from pypy.module.cpyext.pyobject import PyObject, borrow_from
 from pypy.module.cpyext.pyerrors import PyErr_SetFromErrno
+from pypy.module.cpyext.funcobject import PyCodeObject
 from pypy.module.__builtin__ import compiling
 
 PyCompilerFlags = cpython_struct(
-    "PyCompilerFlags", ())
+    "PyCompilerFlags", (("cf_flags", rffi.INT),))
 PyCompilerFlagsPtr = lltype.Ptr(PyCompilerFlags)
 
+PyCF_MASK = (consts.CO_FUTURE_DIVISION | 
+             consts.CO_FUTURE_ABSOLUTE_IMPORT |
+             consts.CO_FUTURE_WITH_STATEMENT |
+             consts.CO_FUTURE_PRINT_FUNCTION |
+             consts.CO_FUTURE_UNICODE_LITERALS)
+
 @cpython_api([PyObject, PyObject, PyObject], PyObject)
 def PyEval_CallObjectWithKeywords(space, w_obj, w_arg, w_kwds):
     return space.call(w_obj, w_arg, w_kwds)
@@ -48,6 +56,17 @@
         return None
     return borrow_from(None, caller.w_globals)
 
+ at cpython_api([PyCodeObject, PyObject, PyObject], PyObject)
+def PyEval_EvalCode(space, w_code, w_globals, w_locals):
+    """This is a simplified interface to PyEval_EvalCodeEx(), with just
+    the code object, and the dictionaries of global and local variables.
+    The other arguments are set to NULL."""
+    if w_globals is None:
+        w_globals = space.w_None
+    if w_locals is None:
+        w_locals = space.w_None
+    return compiling.eval(space, w_code, w_globals, w_locals)
+
 @cpython_api([PyObject, PyObject], PyObject)
 def PyObject_CallObject(space, w_obj, w_arg):
     """
@@ -74,7 +93,7 @@
 Py_file_input = 257
 Py_eval_input = 258
 
-def compile_string(space, source, filename, start):
+def compile_string(space, source, filename, start, flags=0):
     w_source = space.wrap(source)
     start = rffi.cast(lltype.Signed, start)
     if start == Py_file_input:
@@ -86,7 +105,7 @@
     else:
         raise OperationError(space.w_ValueError, space.wrap(
             "invalid mode parameter for compilation"))
-    return compiling.compile(space, w_source, filename, mode)
+    return compiling.compile(space, w_source, filename, mode, flags)
 
 def run_string(space, source, filename, start, w_globals, w_locals):
     w_code = compile_string(space, source, filename, start)
@@ -109,6 +128,24 @@
     filename = "<string>"
     return run_string(space, source, filename, start, w_globals, w_locals)
 
+ at cpython_api([rffi.CCHARP, rffi.INT_real, PyObject, PyObject,
+              PyCompilerFlagsPtr], PyObject)
+def PyRun_StringFlags(space, source, start, w_globals, w_locals, flagsptr):
+    """Execute Python source code from str in the context specified by the
+    dictionaries globals and locals with the compiler flags specified by
+    flags.  The parameter start specifies the start token that should be used to
+    parse the source code.
+
+    Returns the result of executing the code as a Python object, or NULL if an
+    exception was raised."""
+    source = rffi.charp2str(source)
+    if flagsptr:
+        flags = rffi.cast(lltype.Signed, flagsptr.c_cf_flags)
+    else:
+        flags = 0
+    w_code = compile_string(space, source, "<string>", start, flags)
+    return compiling.eval(space, w_code, w_globals, w_locals)
+
 @cpython_api([FILEP, CONST_STRING, rffi.INT_real, PyObject, PyObject], PyObject)
 def PyRun_File(space, fp, filename, start, w_globals, w_locals):
     """This is a simplified interface to PyRun_FileExFlags() below, leaving
@@ -150,7 +187,7 @@
 
 @cpython_api([rffi.CCHARP, rffi.CCHARP, rffi.INT_real, PyCompilerFlagsPtr],
              PyObject)
-def Py_CompileStringFlags(space, source, filename, start, flags):
+def Py_CompileStringFlags(space, source, filename, start, flagsptr):
     """Parse and compile the Python source code in str, returning the
     resulting code object.  The start token is given by start; this
     can be used to constrain the code which can be compiled and should
@@ -160,7 +197,30 @@
     returns NULL if the code cannot be parsed or compiled."""
     source = rffi.charp2str(source)
     filename = rffi.charp2str(filename)
-    if flags:
-        raise OperationError(space.w_NotImplementedError, space.wrap(
-                "cpyext Py_CompileStringFlags does not accept flags"))
-    return compile_string(space, source, filename, start)
+    if flagsptr:
+        flags = rffi.cast(lltype.Signed, flagsptr.c_cf_flags)
+    else:
+        flags = 0
+    return compile_string(space, source, filename, start, flags)
+
+ at cpython_api([PyCompilerFlagsPtr], rffi.INT_real, error=CANNOT_FAIL)
+def PyEval_MergeCompilerFlags(space, cf):
+    """This function changes the flags of the current evaluation
+    frame, and returns true on success, false on failure."""
+    flags = rffi.cast(lltype.Signed, cf.c_cf_flags)
+    result = flags != 0
+    current_frame = space.getexecutioncontext().gettopframe_nohidden()
+    if current_frame:
+        codeflags = current_frame.pycode.co_flags
+        compilerflags = codeflags & PyCF_MASK
+        if compilerflags:
+            result = 1
+            flags |= compilerflags
+        # No future keyword at the moment
+        # if codeflags & CO_GENERATOR_ALLOWED:
+        #     result = 1
+        #     flags |= CO_GENERATOR_ALLOWED
+    cf.c_cf_flags = rffi.cast(rffi.INT, flags)
+    return result
+
+        
diff --git a/pypy/module/cpyext/funcobject.py b/pypy/module/cpyext/funcobject.py
--- a/pypy/module/cpyext/funcobject.py
+++ b/pypy/module/cpyext/funcobject.py
@@ -1,6 +1,6 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    PyObjectFields, generic_cpy_call, CONST_STRING,
+    PyObjectFields, generic_cpy_call, CONST_STRING, CANNOT_FAIL,
     cpython_api, bootstrap_function, cpython_struct, build_type_checkers)
 from pypy.module.cpyext.pyobject import (
     PyObject, make_ref, from_ref, Py_DecRef, make_typedescr, borrow_from)
@@ -48,6 +48,7 @@
 
 PyFunction_Check, PyFunction_CheckExact = build_type_checkers("Function", Function)
 PyMethod_Check, PyMethod_CheckExact = build_type_checkers("Method", Method)
+PyCode_Check, PyCode_CheckExact = build_type_checkers("Code", PyCode)
 
 def function_attach(space, py_obj, w_obj):
     py_func = rffi.cast(PyFunctionObject, py_obj)
@@ -167,3 +168,9 @@
                              freevars=[],
                              cellvars=[]))
 
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyCode_GetNumFree(space, w_co):
+    """Return the number of free variables in co."""
+    co = space.interp_w(PyCode, w_co)
+    return len(co.co_freevars)
+
diff --git a/pypy/module/cpyext/include/Python.h b/pypy/module/cpyext/include/Python.h
--- a/pypy/module/cpyext/include/Python.h
+++ b/pypy/module/cpyext/include/Python.h
@@ -113,6 +113,7 @@
 #include "compile.h"
 #include "frameobject.h"
 #include "eval.h"
+#include "pymath.h"
 #include "pymem.h"
 #include "pycobject.h"
 #include "pycapsule.h"
diff --git a/pypy/module/cpyext/include/code.h b/pypy/module/cpyext/include/code.h
--- a/pypy/module/cpyext/include/code.h
+++ b/pypy/module/cpyext/include/code.h
@@ -13,13 +13,19 @@
 
 /* Masks for co_flags above */
 /* These values are also in funcobject.py */
-#define CO_OPTIMIZED	0x0001
-#define CO_NEWLOCALS	0x0002
-#define CO_VARARGS	0x0004
-#define CO_VARKEYWORDS	0x0008
+#define CO_OPTIMIZED    0x0001
+#define CO_NEWLOCALS    0x0002
+#define CO_VARARGS      0x0004
+#define CO_VARKEYWORDS  0x0008
 #define CO_NESTED       0x0010
 #define CO_GENERATOR    0x0020
 
+#define CO_FUTURE_DIVISION         0x02000
+#define CO_FUTURE_ABSOLUTE_IMPORT  0x04000
+#define CO_FUTURE_WITH_STATEMENT   0x08000
+#define CO_FUTURE_PRINT_FUNCTION   0x10000
+#define CO_FUTURE_UNICODE_LITERALS 0x20000
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/include/intobject.h b/pypy/module/cpyext/include/intobject.h
--- a/pypy/module/cpyext/include/intobject.h
+++ b/pypy/module/cpyext/include/intobject.h
@@ -7,6 +7,11 @@
 extern "C" {
 #endif
 
+typedef struct {
+    PyObject_HEAD
+    long ob_ival;
+} PyIntObject;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -56,6 +56,8 @@
 #define Py_TYPE(ob)		(((PyObject*)(ob))->ob_type)
 #define Py_SIZE(ob)		(((PyVarObject*)(ob))->ob_size)
 
+#define _Py_ForgetReference(ob) /* nothing */
+
 #define Py_None (&_Py_NoneStruct)
 
 /*
diff --git a/pypy/module/cpyext/include/pymath.h b/pypy/module/cpyext/include/pymath.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/include/pymath.h
@@ -0,0 +1,20 @@
+#ifndef Py_PYMATH_H
+#define Py_PYMATH_H
+
+/**************************************************************************
+Symbols and macros to supply platform-independent interfaces to mathematical
+functions and constants
+**************************************************************************/
+
+/* HUGE_VAL is supposed to expand to a positive double infinity.  Python
+ * uses Py_HUGE_VAL instead because some platforms are broken in this
+ * respect.  We used to embed code in pyport.h to try to worm around that,
+ * but different platforms are broken in conflicting ways.  If you're on
+ * a platform where HUGE_VAL is defined incorrectly, fiddle your Python
+ * config to #define Py_HUGE_VAL to something that works on your platform.
+ */
+#ifndef Py_HUGE_VAL
+#define Py_HUGE_VAL HUGE_VAL
+#endif
+
+#endif /* Py_PYMATH_H */
diff --git a/pypy/module/cpyext/include/pythonrun.h b/pypy/module/cpyext/include/pythonrun.h
--- a/pypy/module/cpyext/include/pythonrun.h
+++ b/pypy/module/cpyext/include/pythonrun.h
@@ -19,6 +19,14 @@
     int cf_flags;  /* bitmask of CO_xxx flags relevant to future */
 } PyCompilerFlags;
 
+#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \
+                   CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \
+                   CO_FUTURE_UNICODE_LITERALS)
+#define PyCF_MASK_OBSOLETE (CO_NESTED)
+#define PyCF_SOURCE_IS_UTF8  0x0100
+#define PyCF_DONT_IMPLY_DEDENT 0x0200
+#define PyCF_ONLY_AST 0x0400
+
 #define Py_CompileString(str, filename, start) Py_CompileStringFlags(str, filename, start, NULL)
 
 #ifdef __cplusplus
diff --git a/pypy/module/cpyext/include/pythread.h b/pypy/module/cpyext/include/pythread.h
--- a/pypy/module/cpyext/include/pythread.h
+++ b/pypy/module/cpyext/include/pythread.h
@@ -3,8 +3,26 @@
 
 #define WITH_THREAD
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef void *PyThread_type_lock;
 #define WAIT_LOCK	1
 #define NOWAIT_LOCK	0
 
+/* Thread Local Storage (TLS) API */
+PyAPI_FUNC(int) PyThread_create_key(void);
+PyAPI_FUNC(void) PyThread_delete_key(int);
+PyAPI_FUNC(int) PyThread_set_key_value(int, void *);
+PyAPI_FUNC(void *) PyThread_get_key_value(int);
+PyAPI_FUNC(void) PyThread_delete_key_value(int key);
+
+/* Cleanup after a fork */
+PyAPI_FUNC(void) PyThread_ReInitTLS(void);
+
+#ifdef __cplusplus
+}
 #endif
+
+#endif
diff --git a/pypy/module/cpyext/intobject.py b/pypy/module/cpyext/intobject.py
--- a/pypy/module/cpyext/intobject.py
+++ b/pypy/module/cpyext/intobject.py
@@ -2,11 +2,37 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.interpreter.error import OperationError
 from pypy.module.cpyext.api import (
-    cpython_api, build_type_checkers, PyObject,
-    CONST_STRING, CANNOT_FAIL, Py_ssize_t)
+    cpython_api, cpython_struct, build_type_checkers, bootstrap_function,
+    PyObject, PyObjectFields, CONST_STRING, CANNOT_FAIL, Py_ssize_t)
+from pypy.module.cpyext.pyobject import (
+    make_typedescr, track_reference, RefcountState, from_ref)
 from pypy.rlib.rarithmetic import r_uint, intmask, LONG_TEST
+from pypy.objspace.std.intobject import W_IntObject
 import sys
 
+PyIntObjectStruct = lltype.ForwardReference()
+PyIntObject = lltype.Ptr(PyIntObjectStruct)
+PyIntObjectFields = PyObjectFields + \
+    (("ob_ival", rffi.LONG),)
+cpython_struct("PyIntObject", PyIntObjectFields, PyIntObjectStruct)
+
+ at bootstrap_function
+def init_intobject(space):
+    "Type description of PyIntObject"
+    make_typedescr(space.w_int.instancetypedef,
+                   basestruct=PyIntObject.TO,
+                   realize=int_realize)
+
+def int_realize(space, obj):
+    intval = rffi.cast(lltype.Signed, rffi.cast(PyIntObject, obj).c_ob_ival)
+    w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type))
+    w_obj = space.allocate_instance(W_IntObject, w_type)
+    w_obj.__init__(intval)
+    track_reference(space, obj, w_obj)
+    state = space.fromcache(RefcountState)
+    state.set_lifeline(w_obj, obj)
+    return w_obj
+
 PyInt_Check, PyInt_CheckExact = build_type_checkers("Int")
 
 @cpython_api([], lltype.Signed, error=CANNOT_FAIL)
diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py
--- a/pypy/module/cpyext/object.py
+++ b/pypy/module/cpyext/object.py
@@ -193,7 +193,7 @@
     if not obj:
         PyErr_NoMemory(space)
     obj.c_ob_type = type
-    _Py_NewReference(space, obj)
+    obj.c_ob_refcnt = 1
     return obj
 
 @cpython_api([PyVarObject, PyTypeObjectPtr, Py_ssize_t], PyObject)
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -17,6 +17,7 @@
 
 class BaseCpyTypedescr(object):
     basestruct = PyObject.TO
+    W_BaseObject = W_ObjectObject
 
     def get_dealloc(self, space):
         from pypy.module.cpyext.typeobject import subtype_dealloc
@@ -51,10 +52,14 @@
     def attach(self, space, pyobj, w_obj):
         pass
 
-    def realize(self, space, ref):
-        # For most types, a reference cannot exist without
-        # a real interpreter object
-        raise InvalidPointerException(str(ref))
+    def realize(self, space, obj):
+        w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type))
+        w_obj = space.allocate_instance(self.W_BaseObject, w_type)
+        track_reference(space, obj, w_obj)
+        if w_type is not space.gettypefor(self.W_BaseObject):
+            state = space.fromcache(RefcountState)
+            state.set_lifeline(w_obj, obj)
+        return w_obj
 
 typedescr_cache = {}
 
@@ -369,13 +374,7 @@
     obj.c_ob_refcnt = 1
     w_type = from_ref(space, rffi.cast(PyObject, obj.c_ob_type))
     assert isinstance(w_type, W_TypeObject)
-    if w_type.is_cpytype():
-        w_obj = space.allocate_instance(W_ObjectObject, w_type)
-        track_reference(space, obj, w_obj)
-        state = space.fromcache(RefcountState)
-        state.set_lifeline(w_obj, obj)
-    else:
-        assert False, "Please add more cases in _Py_NewReference()"
+    get_typedescr(w_type.instancetypedef).realize(space, obj)
 
 def _Py_Dealloc(space, obj):
     from pypy.module.cpyext.api import generic_cpy_call_dont_decref
diff --git a/pypy/module/cpyext/src/thread.c b/pypy/module/cpyext/src/thread.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/src/thread.c
@@ -0,0 +1,313 @@
+#include <Python.h>
+#include "pythread.h"
+
+/* ------------------------------------------------------------------------
+Per-thread data ("key") support.
+
+Use PyThread_create_key() to create a new key.  This is typically shared
+across threads.
+
+Use PyThread_set_key_value(thekey, value) to associate void* value with
+thekey in the current thread.  Each thread has a distinct mapping of thekey
+to a void* value.  Caution:  if the current thread already has a mapping
+for thekey, value is ignored.
+
+Use PyThread_get_key_value(thekey) to retrieve the void* value associated
+with thekey in the current thread.  This returns NULL if no value is
+associated with thekey in the current thread.
+
+Use PyThread_delete_key_value(thekey) to forget the current thread's associated
+value for thekey.  PyThread_delete_key(thekey) forgets the values associated
+with thekey across *all* threads.
+
+While some of these functions have error-return values, none set any
+Python exception.
+
+None of the functions does memory management on behalf of the void* values.
+You need to allocate and deallocate them yourself.  If the void* values
+happen to be PyObject*, these functions don't do refcount operations on
+them either.
+
+The GIL does not need to be held when calling these functions; they supply
+their own locking.  This isn't true of PyThread_create_key(), though (see
+next paragraph).
+
+There's a hidden assumption that PyThread_create_key() will be called before
+any of the other functions are called.  There's also a hidden assumption
+that calls to PyThread_create_key() are serialized externally.
+------------------------------------------------------------------------ */
+
+#ifdef MS_WINDOWS
+#include <windows.h>
+
+/* use native Windows TLS functions */
+#define Py_HAVE_NATIVE_TLS
+
+int
+PyThread_create_key(void)
+{
+    return (int) TlsAlloc();
+}
+
+void
+PyThread_delete_key(int key)
+{
+    TlsFree(key);
+}
+
+/* We must be careful to emulate the strange semantics implemented in thread.c,
+ * where the value is only set if it hasn't been set before.
+ */
+int
+PyThread_set_key_value(int key, void *value)
+{
+    BOOL ok;
+    void *oldvalue;
+
+    assert(value != NULL);
+    oldvalue = TlsGetValue(key);
+    if (oldvalue != NULL)
+        /* ignore value if already set */
+        return 0;
+    ok = TlsSetValue(key, value);
+    if (!ok)
+        return -1;
+    return 0;
+}
+
+void *
+PyThread_get_key_value(int key)
+{
+    /* because TLS is used in the Py_END_ALLOW_THREAD macro,
+     * it is necessary to preserve the windows error state, because
+     * it is assumed to be preserved across the call to the macro.
+     * Ideally, the macro should be fixed, but it is simpler to
+     * do it here.
+     */
+    DWORD error = GetLastError();
+    void *result = TlsGetValue(key);
+    SetLastError(error);
+    return result;
+}
+
+void
+PyThread_delete_key_value(int key)
+{
+    /* NULL is used as "key missing", and it is also the default
+     * given by TlsGetValue() if nothing has been set yet.
+     */
+    TlsSetValue(key, NULL);
+}
+
+/* reinitialization of TLS is not necessary after fork when using
+ * the native TLS functions.  And forking isn't supported on Windows either.
+ */
+void
+PyThread_ReInitTLS(void)
+{}
+
+#else  /* MS_WINDOWS */
+
+/* A singly-linked list of struct key objects remembers all the key->value
+ * associations.  File static keyhead heads the list.  keymutex is used
+ * to enforce exclusion internally.
+ */
+struct key {
+    /* Next record in the list, or NULL if this is the last record. */
+    struct key *next;
+
+    /* The thread id, according to PyThread_get_thread_ident(). */
+    long id;
+
+    /* The key and its associated value. */
+    int key;
+    void *value;
+};
+
+static struct key *keyhead = NULL;
+static PyThread_type_lock keymutex = NULL;
+static int nkeys = 0;  /* PyThread_create_key() hands out nkeys+1 next */
+
+/* Internal helper.
+ * If the current thread has a mapping for key, the appropriate struct key*
+ * is returned.  NB:  value is ignored in this case!
+ * If there is no mapping for key in the current thread, then:
+ *     If value is NULL, NULL is returned.
+ *     Else a mapping of key to value is created for the current thread,
+ *     and a pointer to a new struct key* is returned; except that if
+ *     malloc() can't find room for a new struct key*, NULL is returned.
+ * So when value==NULL, this acts like a pure lookup routine, and when
+ * value!=NULL, this acts like dict.setdefault(), returning an existing
+ * mapping if one exists, else creating a new mapping.
+ *
+ * Caution:  this used to be too clever, trying to hold keymutex only
+ * around the "p->next = keyhead; keyhead = p" pair.  That allowed
+ * another thread to mutate the list, via key deletion, concurrent with
+ * find_key() crawling over the list.  Hilarity ensued.  For example, when
+ * the for-loop here does "p = p->next", p could end up pointing at a
+ * record that PyThread_delete_key_value() was concurrently free()'ing.
+ * That could lead to anything, from failing to find a key that exists, to
+ * segfaults.  Now we lock the whole routine.
+ */
+static struct key *
+find_key(int key, void *value)
+{
+    struct key *p, *prev_p;
+    long id = PyThread_get_thread_ident();
+
+    if (!keymutex)
+        return NULL;
+    PyThread_acquire_lock(keymutex, 1);
+    prev_p = NULL;
+    for (p = keyhead; p != NULL; p = p->next) {
+        if (p->id == id && p->key == key)
+            goto Done;
+        /* Sanity check.  These states should never happen but if
+         * they do we must abort.  Otherwise we'll end up spinning in
+         * in a tight loop with the lock held.  A similar check is done
+         * in pystate.c tstate_delete_common().  */
+        if (p == prev_p)
+            Py_FatalError("tls find_key: small circular list(!)");
+        prev_p = p;
+        if (p->next == keyhead)
+            Py_FatalError("tls find_key: circular list(!)");
+    }
+    if (value == NULL) {
+        assert(p == NULL);
+        goto Done;
+    }
+    p = (struct key *)malloc(sizeof(struct key));
+    if (p != NULL) {
+        p->id = id;
+        p->key = key;
+        p->value = value;
+        p->next = keyhead;
+        keyhead = p;
+    }
+ Done:
+    PyThread_release_lock(keymutex);
+    return p;
+}
+
+/* Return a new key.  This must be called before any other functions in
+ * this family, and callers must arrange to serialize calls to this
+ * function.  No violations are detected.
+ */
+int
+PyThread_create_key(void)
+{
+    /* All parts of this function are wrong if it's called by multiple
+     * threads simultaneously.
+     */
+    if (keymutex == NULL)
+        keymutex = PyThread_allocate_lock();
+    return ++nkeys;
+}
+
+/* Forget the associations for key across *all* threads. */
+void
+PyThread_delete_key(int key)
+{
+    struct key *p, **q;
+
+    PyThread_acquire_lock(keymutex, 1);
+    q = &keyhead;
+    while ((p = *q) != NULL) {
+        if (p->key == key) {
+            *q = p->next;
+            free((void *)p);
+            /* NB This does *not* free p->value! */
+        }
+        else
+            q = &p->next;
+    }
+    PyThread_release_lock(keymutex);
+}
+
+/* Confusing:  If the current thread has an association for key,
+ * value is ignored, and 0 is returned.  Else an attempt is made to create
+ * an association of key to value for the current thread.  0 is returned
+ * if that succeeds, but -1 is returned if there's not enough memory
+ * to create the association.  value must not be NULL.
+ */
+int
+PyThread_set_key_value(int key, void *value)
+{
+    struct key *p;
+
+    assert(value != NULL);
+    p = find_key(key, value);
+    if (p == NULL)
+        return -1;
+    else
+        return 0;
+}
+
+/* Retrieve the value associated with key in the current thread, or NULL
+ * if the current thread doesn't have an association for key.
+ */
+void *
+PyThread_get_key_value(int key)
+{
+    struct key *p = find_key(key, NULL);
+
+    if (p == NULL)
+        return NULL;
+    else
+        return p->value;
+}
+
+/* Forget the current thread's association for key, if any. */
+void
+PyThread_delete_key_value(int key)
+{
+    long id = PyThread_get_thread_ident();
+    struct key *p, **q;
+
+    PyThread_acquire_lock(keymutex, 1);
+    q = &keyhead;
+    while ((p = *q) != NULL) {
+        if (p->key == key && p->id == id) {
+            *q = p->next;
+            free((void *)p);
+            /* NB This does *not* free p->value! */
+            break;
+        }
+        else
+            q = &p->next;
+    }
+    PyThread_release_lock(keymutex);
+}
+
+/* Forget everything not associated with the current thread id.
+ * This function is called from PyOS_AfterFork().  It is necessary
+ * because other thread ids which were in use at the time of the fork
+ * may be reused for new threads created in the forked process.
+ */
+void
+PyThread_ReInitTLS(void)
+{
+    long id = PyThread_get_thread_ident();
+    struct key *p, **q;
+
+    if (!keymutex)
+        return;
+
+    /* As with interpreter_lock in PyEval_ReInitThreads()
+       we just create a new lock without freeing the old one */
+    keymutex = PyThread_allocate_lock();
+
+    /* Delete all keys which do not match the current thread id */
+    q = &keyhead;
+    while ((p = *q) != NULL) {
+        if (p->id != id) {
+            *q = p->next;
+            free((void *)p);
+            /* NB This does *not* free p->value! */
+        }
+        else
+            q = &p->next;
+    }
+}
+
+#endif  /* !MS_WINDOWS */
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -182,16 +182,6 @@
     used as the positional and keyword parameters to the object's constructor."""
     raise NotImplementedError
 
- at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
-def PyCode_Check(space, co):
-    """Return true if co is a code object"""
-    raise NotImplementedError
-
- at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
-def PyCode_GetNumFree(space, co):
-    """Return the number of free variables in co."""
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=-1)
 def PyCodec_Register(space, search_function):
     """Register a new codec search function.
@@ -1853,26 +1843,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISTITLE(space, ch):
-    """Return 1 or 0 depending on whether ch is a titlecase character."""
-    raise NotImplementedError
-
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISDIGIT(space, ch):
-    """Return 1 or 0 depending on whether ch is a digit character."""
-    raise NotImplementedError
-
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISNUMERIC(space, ch):
-    """Return 1 or 0 depending on whether ch is a numeric character."""
-    raise NotImplementedError
-
- at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
-def Py_UNICODE_ISALPHA(space, ch):
-    """Return 1 or 0 depending on whether ch is an alphabetic character."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP], PyObject)
 def PyUnicode_FromFormat(space, format):
     """Take a C printf()-style format string and a variable number of
@@ -2317,17 +2287,6 @@
     use the default error handling."""
     raise NotImplementedError
 
- at cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t, rffi.INT_real], rffi.INT_real, error=-1)
-def PyUnicode_Tailmatch(space, str, substr, start, end, direction):
-    """Return 1 if substr matches str*[*start:end] at the given tail end
-    (direction == -1 means to do a prefix match, direction == 1 a suffix match),
-    0 otherwise. Return -1 if an error occurred.
-
-    This function used an int type for start and end. This
-    might require changes in your code for properly supporting 64-bit
-    systems."""
-    raise NotImplementedError
-
 @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t, rffi.INT_real], Py_ssize_t, error=-2)
 def PyUnicode_Find(space, str, substr, start, end, direction):
     """Return the first position of substr in str*[*start:end] using the given
@@ -2524,17 +2483,6 @@
     source code is read from fp instead of an in-memory string."""
     raise NotImplementedError
 
- at cpython_api([rffi.CCHARP, rffi.INT_real, PyObject, PyObject, PyCompilerFlags], PyObject)
-def PyRun_StringFlags(space, str, start, globals, locals, flags):
-    """Execute Python source code from str in the context specified by the
-    dictionaries globals and locals with the compiler flags specified by
-    flags.  The parameter start specifies the start token that should be used to
-    parse the source code.
-
-    Returns the result of executing the code as a Python object, or NULL if an
-    exception was raised."""
-    raise NotImplementedError
-
 @cpython_api([FILE, rffi.CCHARP, rffi.INT_real, PyObject, PyObject, rffi.INT_real], PyObject)
 def PyRun_FileEx(space, fp, filename, start, globals, locals, closeit):
     """This is a simplified interface to PyRun_FileExFlags() below, leaving
@@ -2555,13 +2503,6 @@
     returns."""
     raise NotImplementedError
 
- at cpython_api([PyCodeObject, PyObject, PyObject], PyObject)
-def PyEval_EvalCode(space, co, globals, locals):
-    """This is a simplified interface to PyEval_EvalCodeEx(), with just
-    the code object, and the dictionaries of global and local variables.
-    The other arguments are set to NULL."""
-    raise NotImplementedError
-
 @cpython_api([PyCodeObject, PyObject, PyObject, PyObjectP, rffi.INT_real, PyObjectP, rffi.INT_real, PyObjectP, rffi.INT_real, PyObject], PyObject)
 def PyEval_EvalCodeEx(space, co, globals, locals, args, argcount, kws, kwcount, defs, defcount, closure):
     """Evaluate a precompiled code object, given a particular environment for its
@@ -2586,12 +2527,6 @@
     throw() methods of generator objects."""
     raise NotImplementedError
 
- at cpython_api([PyCompilerFlags], rffi.INT_real, error=CANNOT_FAIL)
-def PyEval_MergeCompilerFlags(space, cf):
-    """This function changes the flags of the current evaluation frame, and returns
-    true on success, false on failure."""
-    raise NotImplementedError
-
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyWeakref_Check(space, ob):
     """Return true if ob is either a reference or proxy object.
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -2,9 +2,10 @@
 from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 from pypy.module.cpyext.test.test_api import BaseApiTest
 from pypy.module.cpyext.eval import (
-    Py_single_input, Py_file_input, Py_eval_input)
+    Py_single_input, Py_file_input, Py_eval_input, PyCompilerFlags)
 from pypy.module.cpyext.api import fopen, fclose, fileno, Py_ssize_tP
 from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.astcompiler import consts
 from pypy.tool.udir import udir
 import sys, os
 
@@ -63,6 +64,22 @@
 
         assert space.int_w(w_res) == 10
 
+    def test_evalcode(self, space, api):
+        w_f = space.appexec([], """():
+            def f(*args):
+                assert isinstance(args, tuple)
+                return len(args) + 8
+            return f
+            """)
+
+        w_t = space.newtuple([space.wrap(1), space.wrap(2)])
+        w_globals = space.newdict()
+        w_locals = space.newdict()
+        space.setitem(w_locals, space.wrap("args"), w_t)
+        w_res = api.PyEval_EvalCode(w_f.code, w_globals, w_locals)
+
+        assert space.int_w(w_res) == 10
+
     def test_run_simple_string(self, space, api):
         def run(code):
             buf = rffi.str2charp(code)
@@ -96,6 +113,20 @@
         assert 42 * 43 == space.unwrap(
             api.PyObject_GetItem(w_globals, space.wrap("a")))
 
+    def test_run_string_flags(self, space, api):
+        flags = lltype.malloc(PyCompilerFlags, flavor='raw')
+        flags.c_cf_flags = rffi.cast(rffi.INT, consts.PyCF_SOURCE_IS_UTF8)
+        w_globals = space.newdict()
+        buf = rffi.str2charp("a = u'caf\xc3\xa9'")
+        try:
+            api.PyRun_StringFlags(buf, Py_single_input,
+                                  w_globals, w_globals, flags)
+        finally:
+            rffi.free_charp(buf)
+        w_a = space.getitem(w_globals, space.wrap("a"))
+        assert space.unwrap(w_a) == u'caf\xe9'
+        lltype.free(flags, flavor='raw')
+
     def test_run_file(self, space, api):
         filepath = udir / "cpyext_test_runfile.py"
         filepath.write("raise ZeroDivisionError")
@@ -256,3 +287,21 @@
         print dir(mod)
         print mod.__dict__
         assert mod.f(42) == 47
+
+    def test_merge_compiler_flags(self):
+        module = self.import_extension('foo', [
+            ("get_flags", "METH_NOARGS",
+             """
+                PyCompilerFlags flags;
+                flags.cf_flags = 0;
+                int result = PyEval_MergeCompilerFlags(&flags);
+                return Py_BuildValue("ii", result, flags.cf_flags);
+             """),
+            ])
+        assert module.get_flags() == (0, 0)
+
+        ns = {'module':module}
+        exec """from __future__ import division    \nif 1:
+                def nested_flags():
+                    return module.get_flags()""" in ns
+        assert ns['nested_flags']() == (1, 0x2000)  # CO_FUTURE_DIVISION
diff --git a/pypy/module/cpyext/test/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py
--- a/pypy/module/cpyext/test/test_funcobject.py
+++ b/pypy/module/cpyext/test/test_funcobject.py
@@ -81,6 +81,14 @@
         rffi.free_charp(filename)
         rffi.free_charp(funcname)
 
+    def test_getnumfree(self, space, api):
+        w_function = space.appexec([], """():
+            a = 5
+            def method(x): return a, x
+            return method
+        """)
+        assert api.PyCode_GetNumFree(w_function.code) == 1
+
     def test_classmethod(self, space, api):
         w_function = space.appexec([], """():
             def method(x): return x
diff --git a/pypy/module/cpyext/test/test_intobject.py b/pypy/module/cpyext/test/test_intobject.py
--- a/pypy/module/cpyext/test/test_intobject.py
+++ b/pypy/module/cpyext/test/test_intobject.py
@@ -65,4 +65,97 @@
         values = module.values()
         types = [type(x) for x in values]
         assert types == [int, long, int, int]
-        
+
+    def test_int_subtype(self):
+        module = self.import_extension(
+            'foo', [
+            ("newEnum", "METH_VARARGS",
+             """
+                EnumObject *enumObj;
+                long intval;
+                PyObject *name;
+
+                if (!PyArg_ParseTuple(args, "Oi", &name, &intval))
+                    return NULL;
+
+                PyType_Ready(&Enum_Type);
+                enumObj = PyObject_New(EnumObject, &Enum_Type);
+                if (!enumObj) {
+                    return NULL;
+                }
+
+                enumObj->ob_ival = intval;
+                Py_INCREF(name);
+                enumObj->ob_name = name;
+
+                return (PyObject *)enumObj;
+             """),
+            ], 
+            prologue="""
+            typedef struct
+            {
+                PyObject_HEAD
+                long ob_ival;
+                PyObject* ob_name;
+            } EnumObject;
+
+            static void
+            enum_dealloc(EnumObject *op)
+            {
+                    Py_DECREF(op->ob_name);
+                    Py_TYPE(op)->tp_free((PyObject *)op);
+            }
+
+            static PyMemberDef enum_members[] = {
+                {"name", T_OBJECT, offsetof(EnumObject, ob_name), 0, NULL},
+                {NULL}  /* Sentinel */
+            };
+
+            PyTypeObject Enum_Type = {
+                PyObject_HEAD_INIT(0)
+                /*ob_size*/             0,
+                /*tp_name*/             "Enum",
+                /*tp_basicsize*/        sizeof(EnumObject),
+                /*tp_itemsize*/         0,
+                /*tp_dealloc*/          enum_dealloc,
+                /*tp_print*/            0,
+                /*tp_getattr*/          0,
+                /*tp_setattr*/          0,
+                /*tp_compare*/          0,
+                /*tp_repr*/             0,
+                /*tp_as_number*/        0,
+                /*tp_as_sequence*/      0,
+                /*tp_as_mapping*/       0,
+                /*tp_hash*/             0,
+                /*tp_call*/             0,
+                /*tp_str*/              0,
+                /*tp_getattro*/         0,
+                /*tp_setattro*/         0,
+                /*tp_as_buffer*/        0,
+                /*tp_flags*/            Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+                /*tp_doc*/              0,
+                /*tp_traverse*/         0,
+                /*tp_clear*/            0,
+                /*tp_richcompare*/      0,
+                /*tp_weaklistoffset*/   0,
+                /*tp_iter*/             0,
+                /*tp_iternext*/         0,
+                /*tp_methods*/          0,
+                /*tp_members*/          enum_members,
+                /*tp_getset*/           0,
+                /*tp_base*/             &PyInt_Type,
+                /*tp_dict*/             0,
+                /*tp_descr_get*/        0,
+                /*tp_descr_set*/        0,
+                /*tp_dictoffset*/       0,
+                /*tp_init*/             0,
+                /*tp_alloc*/            0,
+                /*tp_new*/              0
+            };
+            """)
+
+        a = module.newEnum("ULTIMATE_ANSWER", 42)
+        assert type(a).__name__ == "Enum"
+        assert isinstance(a, int)
+        assert a == int(a) == 42
+        assert a.name == "ULTIMATE_ANSWER"
diff --git a/pypy/module/cpyext/test/test_stringobject.py b/pypy/module/cpyext/test/test_stringobject.py
--- a/pypy/module/cpyext/test/test_stringobject.py
+++ b/pypy/module/cpyext/test/test_stringobject.py
@@ -105,6 +105,15 @@
             )])
         assert module.string_as_string("huheduwe") == "huhe"
 
+    def test_py_string_as_string_None(self):
+        module = self.import_extension('foo', [
+            ("string_None", "METH_VARARGS",
+             '''
+             return PyString_AsString(Py_None);
+             '''
+            )])
+        raises(TypeError, module.string_None)
+
     def test_AsStringAndSize(self):
         module = self.import_extension('foo', [
             ("getstring", "METH_NOARGS",
diff --git a/pypy/module/cpyext/test/test_thread.py b/pypy/module/cpyext/test/test_thread.py
--- a/pypy/module/cpyext/test/test_thread.py
+++ b/pypy/module/cpyext/test/test_thread.py
@@ -5,6 +5,7 @@
 
 from pypy.module.thread.ll_thread import allocate_ll_lock
 from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 
 
 class TestPyThread(BaseApiTest):
@@ -38,3 +39,51 @@
         api.PyThread_release_lock(lock)
         assert api.PyThread_acquire_lock(lock, 0) == 1
         api.PyThread_free_lock(lock)
+
+
+class AppTestThread(AppTestCpythonExtensionBase):
+    def test_tls(self):
+        module = self.import_extension('foo', [
+            ("create_key", "METH_NOARGS",
+             """
+                 return PyInt_FromLong(PyThread_create_key());
+             """),
+            ("test_key", "METH_O",
+             """
+                 int key = PyInt_AsLong(args);
+                 if (PyThread_get_key_value(key) != NULL) {
+                     PyErr_SetNone(PyExc_ValueError);
+                     return NULL;
+                 }
+                 if (PyThread_set_key_value(key, (void*)123) < 0) {
+                     PyErr_SetNone(PyExc_ValueError);
+                     return NULL;
+                 }
+                 if (PyThread_get_key_value(key) != (void*)123) {
+                     PyErr_SetNone(PyExc_ValueError);
+                     return NULL;
+                 }
+                 Py_RETURN_NONE;
+             """),
+            ])
+        key = module.create_key()
+        assert key > 0
+        # Test value in main thread.
+        module.test_key(key)
+        raises(ValueError, module.test_key, key)
+        # Same test, in another thread.
+        result = []
+        import thread, time
+        def in_thread():
+            try:
+                module.test_key(key)
+                raises(ValueError, module.test_key, key)
+            except Exception, e:
+                result.append(e)
+            else:
+                result.append(True)
+        thread.start_new_thread(in_thread, ())
+        while not result:
+            print "."
+            time.sleep(.5)
+        assert result == [True]
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -204,8 +204,18 @@
             assert api.Py_UNICODE_ISSPACE(unichr(char))
         assert not api.Py_UNICODE_ISSPACE(u'a')
 
+        assert api.Py_UNICODE_ISALPHA(u'a')
+        assert not api.Py_UNICODE_ISALPHA(u'0')
+        assert api.Py_UNICODE_ISALNUM(u'a')
+        assert api.Py_UNICODE_ISALNUM(u'0')
+        assert not api.Py_UNICODE_ISALNUM(u'+')
+
         assert api.Py_UNICODE_ISDECIMAL(u'\u0660')
         assert not api.Py_UNICODE_ISDECIMAL(u'a')
+        assert api.Py_UNICODE_ISDIGIT(u'9')
+        assert not api.Py_UNICODE_ISDIGIT(u'@')
+        assert api.Py_UNICODE_ISNUMERIC(u'9')
+        assert not api.Py_UNICODE_ISNUMERIC(u'@')
 
         for char in [0x0a, 0x0d, 0x1c, 0x1d, 0x1e, 0x85, 0x2028, 0x2029]:
             assert api.Py_UNICODE_ISLINEBREAK(unichr(char))
@@ -216,6 +226,9 @@
         assert not api.Py_UNICODE_ISUPPER(u'a')
         assert not api.Py_UNICODE_ISLOWER(u'&#65533;')
         assert api.Py_UNICODE_ISUPPER(u'&#65533;')
+        assert not api.Py_UNICODE_ISTITLE(u'A')
+        assert api.Py_UNICODE_ISTITLE(
+            u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}')
 
     def test_TOLOWER(self, space, api):
         assert api.Py_UNICODE_TOLOWER(u'&#65533;') == u'&#65533;'
@@ -437,3 +450,10 @@
             api.PyUnicode_Replace(w_str, w_substr, w_replstr, 2))
         assert u"zbzbzbzb" == space.unwrap(
             api.PyUnicode_Replace(w_str, w_substr, w_replstr, -1))
+
+    def test_tailmatch(self, space, api):
+        w_str = space.wrap(u"abcdef")
+        assert api.PyUnicode_Tailmatch(w_str, space.wrap("cde"), 2, 10, 1) == 1
+        assert api.PyUnicode_Tailmatch(w_str, space.wrap("cde"), 1, 5, -1) == 1
+        self.raises(space, api, TypeError,
+                    api.PyUnicode_Tailmatch, w_str, space.wrap(3), 2, 10, 1)
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -12,7 +12,7 @@
     make_typedescr, get_typedescr)
 from pypy.module.cpyext.stringobject import PyString_Check
 from pypy.module.sys.interp_encoding import setdefaultencoding
-from pypy.objspace.std import unicodeobject, unicodetype
+from pypy.objspace.std import unicodeobject, unicodetype, stringtype
 from pypy.rlib import runicode
 from pypy.tool.sourcetools import func_renamer
 import sys
@@ -89,6 +89,11 @@
     return unicodedb.isspace(ord(ch))
 
 @cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISALPHA(space, ch):
+    """Return 1 or 0 depending on whether ch is an alphabetic character."""
+    return unicodedb.isalpha(ord(ch))
+
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
 def Py_UNICODE_ISALNUM(space, ch):
     """Return 1 or 0 depending on whether ch is an alphanumeric character."""
     return unicodedb.isalnum(ord(ch))
@@ -104,6 +109,16 @@
     return unicodedb.isdecimal(ord(ch))
 
 @cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISDIGIT(space, ch):
+    """Return 1 or 0 depending on whether ch is a digit character."""
+    return unicodedb.isdigit(ord(ch))
+
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISNUMERIC(space, ch):
+    """Return 1 or 0 depending on whether ch is a numeric character."""
+    return unicodedb.isnumeric(ord(ch))
+
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
 def Py_UNICODE_ISLOWER(space, ch):
     """Return 1 or 0 depending on whether ch is a lowercase character."""
     return unicodedb.islower(ord(ch))
@@ -113,6 +128,11 @@
     """Return 1 or 0 depending on whether ch is an uppercase character."""
     return unicodedb.isupper(ord(ch))
 
+ at cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
+def Py_UNICODE_ISTITLE(space, ch):
+    """Return 1 or 0 depending on whether ch is a titlecase character."""
+    return unicodedb.istitle(ord(ch))
+
 @cpython_api([Py_UNICODE], Py_UNICODE, error=CANNOT_FAIL)
 def Py_UNICODE_TOLOWER(space, ch):
     """Return the character ch converted to lower case."""
@@ -155,6 +175,11 @@
     except KeyError:
         return -1.0
 
+ at cpython_api([], Py_UNICODE, error=CANNOT_FAIL)
+def PyUnicode_GetMax(space):
+    """Get the maximum ordinal for a Unicode character."""
+    return runicode.UNICHR(runicode.MAXUNICODE)
+
 @cpython_api([PyObject], rffi.CCHARP, error=CANNOT_FAIL)
 def PyUnicode_AS_DATA(space, ref):
     """Return a pointer to the internal buffer of the object. o has to be a
@@ -560,3 +585,16 @@
     return space.call_method(w_str, "replace", w_substr, w_replstr,
                              space.wrap(maxcount))
 
+ at cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t, rffi.INT_real],
+             rffi.INT_real, error=-1)
+def PyUnicode_Tailmatch(space, w_str, w_substr, start, end, direction):
+    """Return 1 if substr matches str[start:end] at the given tail end
+    (direction == -1 means to do a prefix match, direction == 1 a
+    suffix match), 0 otherwise. Return -1 if an error occurred."""
+    str = space.unicode_w(w_str)
+    substr = space.unicode_w(w_substr)
+    if rffi.cast(lltype.Signed, direction) >= 0:
+        return stringtype.stringstartswith(str, substr, start, end)
+    else:
+        return stringtype.stringendswith(str, substr, start, end)
+
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -1,10 +1,11 @@
 from pypy.module.imp import importing
 from pypy.module._file.interp_file import W_File
 from pypy.rlib import streamio
+from pypy.rlib.streamio import StreamErrors
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.module import Module
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.module._file.interp_stream import StreamErrors, wrap_streamerror
+from pypy.interpreter.streamutil import wrap_streamerror
 
 
 def get_suffixes(space):
diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -357,7 +357,7 @@
 
     def test_cannot_write_pyc(self):
         import sys, os
-        p = os.path.join(sys.path[-1], 'readonly')
+        p = os.path.join(sys.path[0], 'readonly')
         try:
             os.chmod(p, 0555)
         except:
diff --git a/pypy/module/marshal/interp_marshal.py b/pypy/module/marshal/interp_marshal.py
--- a/pypy/module/marshal/interp_marshal.py
+++ b/pypy/module/marshal/interp_marshal.py
@@ -327,8 +327,10 @@
     # %r not supported in rpython
     #u.raise_exc('invalid typecode in unmarshal: %r' % tc)
     c = ord(tc)
-    if c < 32 or c > 126:
-        s = '\\x' + hex(c)
+    if c < 16:
+        s = '\\x0%x' % c
+    elif c < 32 or c > 126:
+        s = '\\x%x' % c
     elif tc == '\\':
         s = r'\\'
     else:
diff --git a/pypy/module/marshal/test/test_marshal.py b/pypy/module/marshal/test/test_marshal.py
--- a/pypy/module/marshal/test/test_marshal.py
+++ b/pypy/module/marshal/test/test_marshal.py
@@ -174,6 +174,11 @@
                 pass
             raises(ValueError, marshal.dumps, subtype)
 
+    def test_bad_typecode(self):
+        import marshal
+        exc = raises(ValueError, marshal.loads, chr(1))
+        assert r"'\x01'" in exc.value.message
+
 
 class AppTestRope(AppTestMarshal):
     def setup_class(cls):
diff --git a/pypy/module/math/test/test_direct.py b/pypy/module/math/test/test_direct.py
--- a/pypy/module/math/test/test_direct.py
+++ b/pypy/module/math/test/test_direct.py
@@ -55,6 +55,12 @@
         ('frexp', (-1.25,), lambda x: x == (-0.625, 1)),
         ('modf',  (4.25,), lambda x: x == (0.25, 4.0)),
         ('modf',  (-4.25,), lambda x: x == (-0.25, -4.0)),
+        ('copysign', (1.5, 0.0), 1.5),
+        ('copysign', (1.5, -0.0), -1.5),
+        ('copysign', (1.5, INFINITY), 1.5),
+        ('copysign', (1.5, -INFINITY), -1.5),
+        ('copysign', (1.5, NAN), 1.5),
+        ('copysign', (1.75, -NAN), -1.75),      # special case for -NAN here
         ]
 
     OVFCASES = [
diff --git a/pypy/module/math/test/test_math.py b/pypy/module/math/test/test_math.py
--- a/pypy/module/math/test/test_math.py
+++ b/pypy/module/math/test/test_math.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 import sys
 from pypy.conftest import gettestobjspace
 from pypy.module.math.test import test_direct
@@ -268,3 +269,7 @@
             def __trunc__(self):
                 return "truncated"
         assert math.trunc(foo()) == "truncated"
+
+    def test_copysign_nan(self):
+        import math
+        assert math.copysign(1.0, float('-nan')) == -1.0
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -89,6 +89,9 @@
         ("multiply", "multiply"),
         ("negative", "negative"),
         ("not_equal", "not_equal"),
+        ("radians", "radians"),
+        ("degrees", "degrees"),
+        ("deg2rad", "radians"),
         ("reciprocal", "reciprocal"),
         ("sign", "sign"),
         ("sin", "sin"),
@@ -107,6 +110,12 @@
         ('logical_xor', 'logical_xor'),
         ('logical_not', 'logical_not'),
         ('logical_or', 'logical_or'),
+        ('log', 'log'),
+        ('log2', 'log2'),
+        ('log10', 'log10'),
+        ('log1p', 'log1p'),
+        ('power', 'power'),
+        ('floor_divide', 'floor_divide'),
     ]:
         interpleveldefs[exposed] = "interp_ufuncs.get(space).%s" % impl
 
diff --git a/pypy/module/micronumpy/interp_boxes.py b/pypy/module/micronumpy/interp_boxes.py
--- a/pypy/module/micronumpy/interp_boxes.py
+++ b/pypy/module/micronumpy/interp_boxes.py
@@ -80,6 +80,7 @@
     descr_mul = _binop_impl("multiply")
     descr_div = _binop_impl("divide")
     descr_truediv = _binop_impl("true_divide")
+    descr_floordiv = _binop_impl("floor_divide")
     descr_mod = _binop_impl("mod")
     descr_pow = _binop_impl("power")
     descr_lshift = _binop_impl("left_shift")
@@ -100,6 +101,7 @@
     descr_rmul = _binop_right_impl("multiply")
     descr_rdiv = _binop_right_impl("divide")
     descr_rtruediv = _binop_right_impl("true_divide")
+    descr_rfloordiv = _binop_right_impl("floor_divide")
     descr_rmod = _binop_right_impl("mod")
     descr_rpow = _binop_right_impl("power")
     descr_rlshift = _binop_right_impl("left_shift")
@@ -208,6 +210,7 @@
     __mul__ = interp2app(W_GenericBox.descr_mul),
     __div__ = interp2app(W_GenericBox.descr_div),
     __truediv__ = interp2app(W_GenericBox.descr_truediv),
+    __floordiv__ = interp2app(W_GenericBox.descr_floordiv),
     __mod__ = interp2app(W_GenericBox.descr_mod),
     __divmod__ = interp2app(W_GenericBox.descr_divmod),
     __pow__ = interp2app(W_GenericBox.descr_pow),
@@ -222,6 +225,7 @@
     __rmul__ = interp2app(W_GenericBox.descr_rmul),
     __rdiv__ = interp2app(W_GenericBox.descr_rdiv),
     __rtruediv__ = interp2app(W_GenericBox.descr_rtruediv),
+    __rfloordiv__ = interp2app(W_GenericBox.descr_rfloordiv),
     __rmod__ = interp2app(W_GenericBox.descr_rmod),
     __rdivmod__ = interp2app(W_GenericBox.descr_rdivmod),
     __rpow__ = interp2app(W_GenericBox.descr_rpow),
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -102,6 +102,7 @@
     descr_mul = _binop_impl("multiply")
     descr_div = _binop_impl("divide")
     descr_truediv = _binop_impl("true_divide")
+    descr_floordiv = _binop_impl("floor_divide")
     descr_mod = _binop_impl("mod")
     descr_pow = _binop_impl("power")
     descr_lshift = _binop_impl("left_shift")
@@ -136,6 +137,7 @@
     descr_rmul = _binop_right_impl("multiply")
     descr_rdiv = _binop_right_impl("divide")
     descr_rtruediv = _binop_right_impl("true_divide")
+    descr_rfloordiv = _binop_right_impl("floor_divide")
     descr_rmod = _binop_right_impl("mod")
     descr_rpow = _binop_right_impl("power")
     descr_rlshift = _binop_right_impl("left_shift")
@@ -779,8 +781,6 @@
     """
     Intermediate class for performing binary operations.
     """
-    _immutable_fields_ = ['left', 'right']
-
     def __init__(self, ufunc, name, shape, calc_dtype, res_dtype, left, right):
         VirtualArray.__init__(self, name, shape, res_dtype)
         self.ufunc = ufunc
@@ -856,8 +856,6 @@
                                          self.right.create_sig(), done_func)
 
 class AxisReduce(Call2):
-    _immutable_fields_ = ['left', 'right']
-
     def __init__(self, ufunc, name, identity, shape, dtype, left, right, dim):
         Call2.__init__(self, ufunc, name, shape, dtype, dtype,
                        left, right)
@@ -1254,6 +1252,7 @@
     __mul__ = interp2app(BaseArray.descr_mul),
     __div__ = interp2app(BaseArray.descr_div),
     __truediv__ = interp2app(BaseArray.descr_truediv),
+    __floordiv__ = interp2app(BaseArray.descr_floordiv),
     __mod__ = interp2app(BaseArray.descr_mod),
     __divmod__ = interp2app(BaseArray.descr_divmod),
     __pow__ = interp2app(BaseArray.descr_pow),
@@ -1268,6 +1267,7 @@
     __rmul__ = interp2app(BaseArray.descr_rmul),
     __rdiv__ = interp2app(BaseArray.descr_rdiv),
     __rtruediv__ = interp2app(BaseArray.descr_rtruediv),
+    __rfloordiv__ = interp2app(BaseArray.descr_rfloordiv),
     __rmod__ = interp2app(BaseArray.descr_rmod),
     __rdivmod__ = interp2app(BaseArray.descr_rdivmod),
     __rpow__ = interp2app(BaseArray.descr_rpow),
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -388,6 +388,7 @@
                                              "int_only": True}),
             ("bitwise_xor", "bitwise_xor", 2, {"int_only": True}),
             ("invert", "invert", 1, {"int_only": True}),
+            ("floor_divide", "floordiv", 2, {"promote_bools": True}),
             ("divide", "div", 2, {"promote_bools": True}),
             ("true_divide", "div", 2, {"promote_to_float": True}),
             ("mod", "mod", 2, {"promote_bools": True}),
@@ -441,6 +442,14 @@
             ("arcsinh", "arcsinh", 1, {"promote_to_float": True}),
             ("arccosh", "arccosh", 1, {"promote_to_float": True}),
             ("arctanh", "arctanh", 1, {"promote_to_float": True}),
+
+            ("radians", "radians", 1, {"promote_to_float": True}),
+            ("degrees", "degrees", 1, {"promote_to_float": True}),
+
+            ("log", "log", 1, {"promote_to_float": True}),
+            ("log2", "log2", 1, {"promote_to_float": True}),
+            ("log10", "log10", 1, {"promote_to_float": True}),
+            ("log1p", "log1p", 1, {"promote_to_float": True}),
         ]:
             self.add_ufunc(space, *ufunc_def)
 
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -625,6 +625,56 @@
         for i in range(5):
             assert b[i] == i / 5.0
 
+    def test_floordiv(self):
+        from math import isnan
+        from _numpypy import array, dtype
+
+        a = array(range(1, 6))
+        b = a // a
+        assert (b == [1, 1, 1, 1, 1]).all()
+
+        a = array(range(1, 6), dtype=bool)
+        b = a // a
+        assert b.dtype is dtype("int8")
+        assert (b == [1, 1, 1, 1, 1]).all()
+
+        a = array([-1, 0, 1])
+        b = array([0, 0, 0])
+        c = a // b
+        assert (c == [0, 0, 0]).all()
+
+        a = array([-1.0, 0.0, 1.0])
+        b = array([0.0, 0.0, 0.0])
+        c = a // b
+        assert c[0] == float('-inf')
+        assert isnan(c[1])
+        assert c[2] == float('inf')
+
+        b = array([-0.0, -0.0, -0.0])
+        c = a // b
+        assert c[0] == float('inf')
+        assert isnan(c[1])
+        assert c[2] == float('-inf')
+
+    def test_floordiv_other(self):
+        from _numpypy import array
+        a = array(range(5))
+        b = array([2, 2, 2, 2, 2], float)
+        c = a // b
+        assert (c == [0, 0, 1, 1, 2]).all()
+
+    def test_rfloordiv(self):
+        from _numpypy import array
+        a = array(range(1, 6))
+        b = 3 // a
+        assert (b == [3, 1, 1, 0, 0]).all()
+
+    def test_floordiv_constant(self):
+        from _numpypy import array
+        a = array(range(5))
+        b = a // 2
+        assert (b == [0, 0, 1, 1, 2]).all()
+
     def test_truediv(self):
         from operator import truediv
         from _numpypy import arange
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -376,6 +376,45 @@
         assert math.isnan(sqrt(-1))
         assert math.isnan(sqrt(nan))
 
+    def test_radians(self):
+        import math
+        from _numpypy import radians, array
+        a = array([
+            -181, -180, -179,
+            181, 180, 179,
+            359, 360, 361,
+            400, -1, 0, 1,
+            float('inf'), float('-inf')])
+        b = radians(a)
+        for i in range(len(a)):
+            assert b[i] == math.radians(a[i])
+
+    def test_deg2rad(self):
+        import math
+        from _numpypy import deg2rad, array
+        a = array([
+            -181, -180, -179,
+            181, 180, 179,
+            359, 360, 361,
+            400, -1, 0, 1,
+            float('inf'), float('-inf')])
+        b = deg2rad(a)
+        for i in range(len(a)):
+            assert b[i] == math.radians(a[i])
+
+    def test_degrees(self):
+        import math
+        from _numpypy import degrees, array
+        a = array([
+            -181, -180, -179,
+            181, 180, 179,
+            359, 360, 361,
+            400, -1, 0, 1,
+            float('inf'), float('-inf')])
+        b = degrees(a)
+        for i in range(len(a)):
+            assert b[i] == math.degrees(a[i])
+
     def test_reduce_errors(self):
         from _numpypy import sin, add
 
@@ -481,3 +520,47 @@
         assert (logical_xor([True, False, True, False], [1, 2, 0, 0])
                 == [False, True, True, False]).all()
         assert (logical_not([True, False]) == [False, True]).all()
+
+    def test_logn(self):
+        import math
+        from _numpypy import log, log2, log10
+
+        for log_func, base in [(log, math.e), (log2, 2), (log10, 10)]:
+            for v in [float('-nan'), float('-inf'), -1, float('nan')]:
+                assert math.isnan(log_func(v))
+            for v in [-0.0, 0.0]:
+                assert log_func(v) == float("-inf")
+            assert log_func(float('inf')) == float('inf')
+            assert (log_func([1, base]) == [0, 1]).all()
+
+    def test_log1p(self):
+        import math
+        from _numpypy import log1p
+
+        for v in [float('-nan'), float('-inf'), -2, float('nan')]:
+            assert math.isnan(log1p(v))
+        for v in [-1]:
+            assert log1p(v) == float("-inf")
+        assert log1p(float('inf')) == float('inf')
+        assert (log1p([0, 1e-50, math.e - 1]) == [0, 1e-50, 1]).all()
+
+    def test_power(self):
+        import math
+        from _numpypy import power, array
+        a = array([1., 2., 3.])
+        b = power(a, 3)
+        for i in range(len(a)):
+            assert b[i] == a[i] ** 3
+
+        a = array([1., 2., 3.])
+        b = array([1., 2., 3.])
+        c = power(a, b)
+        for i in range(len(a)):
+            assert c[i] == a[i] ** b[i]
+
+    def test_floordiv(self):
+        from _numpypy import floor_divide, array
+        a = array([1., 2., 3., 4., 5., 6., 6.01])
+        b = floor_divide(a, 2.5)
+        for i in range(len(a)):
+            assert b[i] == a[i] // 2.5
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -10,6 +10,8 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.rstruct.runpack import runpack
 
+degToRad = math.pi / 180.0
+log2 = math.log(2)
 
 def simple_unary_op(func):
     specialize.argtype(1)(func)
@@ -280,6 +282,12 @@
         return v1 / v2
 
     @simple_binary_op
+    def floordiv(self, v1, v2):
+        if v2 == 0:
+            return 0
+        return v1 // v2
+
+    @simple_binary_op
     def mod(self, v1, v2):
         return v1 % v2
 
@@ -418,6 +426,15 @@
             return rfloat.copysign(rfloat.INFINITY, v1 * v2)
 
     @simple_binary_op
+    def floordiv(self, v1, v2):
+        try:
+            return math.floor(v1 / v2)
+        except ZeroDivisionError:
+            if v1 == v2 == 0.0:
+                return rfloat.NAN
+            return rfloat.copysign(rfloat.INFINITY, v1 * v2)
+
+    @simple_binary_op
     def mod(self, v1, v2):
         return math.fmod(v1, v2)
 
@@ -533,6 +550,57 @@
     def isinf(self, v):
         return rfloat.isinf(v)
 
+    @simple_unary_op
+    def radians(self, v):
+        return v * degToRad
+    deg2rad = radians
+
+    @simple_unary_op
+    def degrees(self, v):
+        return v / degToRad
+
+    @simple_unary_op
+    def log(self, v):
+        try:
+            return math.log(v)
+        except ValueError:
+            if v == 0.0:
+                # CPython raises ValueError here, so we have to check
+                # the value to find the correct numpy return value
+                return -rfloat.INFINITY
+            return rfloat.NAN
+
+    @simple_unary_op
+    def log2(self, v):
+        try:
+            return math.log(v) / log2
+        except ValueError:
+            if v == 0.0:
+                # CPython raises ValueError here, so we have to check
+                # the value to find the correct numpy return value
+                return -rfloat.INFINITY
+            return rfloat.NAN
+
+    @simple_unary_op
+    def log10(self, v):
+        try:
+            return math.log10(v)
+        except ValueError:
+            if v == 0.0:
+                # CPython raises ValueError here, so we have to check
+                # the value to find the correct numpy return value
+                return -rfloat.INFINITY
+            return rfloat.NAN
+
+    @simple_unary_op
+    def log1p(self, v):
+        try:
+            return rfloat.log1p(v)
+        except OverflowError:
+            return -rfloat.INFINITY
+        except ValueError:
+            return rfloat.NAN
+
 
 class Float32(BaseType, Float):
     T = rffi.FLOAT
diff --git a/pypy/module/mmap/__init__.py b/pypy/module/mmap/__init__.py
--- a/pypy/module/mmap/__init__.py
+++ b/pypy/module/mmap/__init__.py
@@ -18,7 +18,7 @@
     def buildloaders(cls):
         from pypy.module.mmap import interp_mmap
         for constant, value in rmmap.constants.iteritems():
-            if isinstance(value, int):
+            if isinstance(value, (int, long)):
                 Module.interpleveldefs[constant] = "space.wrap(%r)" % value
         
         super(Module, cls).buildloaders()
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
--- a/pypy/module/pypyjit/interp_resop.py
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -72,7 +72,7 @@
     Set a compiling hook that will be called each time a loop is optimized,
     but before assembler compilation. This allows to add additional
     optimizations on Python level.
-    
+
     The hook will be called with the following signature:
     hook(jitdriver_name, loop_type, greenkey or guard_number, operations)
 
@@ -121,13 +121,14 @@
             ofs = ops_offset.get(op, 0)
         if op.opnum == rop.DEBUG_MERGE_POINT:
             jd_sd = jitdrivers_sd[op.getarg(0).getint()]
-            greenkey = op.getarglist()[2:]
+            greenkey = op.getarglist()[3:]
             repr = jd_sd.warmstate.get_location_str(greenkey)
             w_greenkey = wrap_greenkey(space, jd_sd.jitdriver, greenkey, repr)
             l_w.append(DebugMergePoint(space, jit_hooks._cast_to_gcref(op),
                                        logops.repr_of_resop(op),
                                        jd_sd.jitdriver.name,
                                        op.getarg(1).getint(),
+                                       op.getarg(2).getint(),
                                        w_greenkey))
         else:
             l_w.append(WrappedOp(jit_hooks._cast_to_gcref(op), ofs,
@@ -164,14 +165,16 @@
         llres = res.llbox
     return WrappedOp(jit_hooks.resop_new(num, args, llres), offset, repr)
 
- at unwrap_spec(repr=str, jd_name=str, call_depth=int)
-def descr_new_dmp(space, w_tp, w_args, repr, jd_name, call_depth, w_greenkey):
+ at unwrap_spec(repr=str, jd_name=str, call_depth=int, call_id=int)
+def descr_new_dmp(space, w_tp, w_args, repr, jd_name, call_depth, call_id,
+    w_greenkey):
+
     args = [space.interp_w(WrappedBox, w_arg).llbox for w_arg in
             space.listview(w_args)]
     num = rop.DEBUG_MERGE_POINT
     return DebugMergePoint(space,
                            jit_hooks.resop_new(num, args, jit_hooks.emptyval()),
-                           repr, jd_name, call_depth, w_greenkey)
+                           repr, jd_name, call_depth, call_id, w_greenkey)
 
 class WrappedOp(Wrappable):
     """ A class representing a single ResOperation, wrapped nicely
@@ -206,10 +209,13 @@
         jit_hooks.resop_setresult(self.op, box.llbox)
 
 class DebugMergePoint(WrappedOp):
-    def __init__(self, space, op, repr_of_resop, jd_name, call_depth, w_greenkey):
+    def __init__(self, space, op, repr_of_resop, jd_name, call_depth, call_id,
+        w_greenkey):
+
         WrappedOp.__init__(self, op, -1, repr_of_resop)
         self.jd_name = jd_name
         self.call_depth = call_depth
+        self.call_id = call_id
         self.w_greenkey = w_greenkey
 
     def get_pycode(self, space):
@@ -246,6 +252,7 @@
     pycode = GetSetProperty(DebugMergePoint.get_pycode),
     bytecode_no = GetSetProperty(DebugMergePoint.get_bytecode_no),
     call_depth = interp_attrproperty("call_depth", cls=DebugMergePoint),
+    call_id = interp_attrproperty("call_id", cls=DebugMergePoint),
     jitdriver_name = GetSetProperty(DebugMergePoint.get_jitdriver_name),
 )
 DebugMergePoint.acceptable_as_base_class = False
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -127,7 +127,7 @@
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
                        'posix', '_socket', '_sre', '_lsprof', '_weakref',
                        '__pypy__', 'cStringIO', '_collections', 'struct',
-                       'mmap', 'marshal', '_codecs']:
+                       'mmap', 'marshal', '_codecs', 'rctime']:
             if modname == 'pypyjit' and 'interp_resop' in rest:
                 return False
             return True
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -54,7 +54,7 @@
         oplist = parse("""
         [i1, i2, p2]
         i3 = int_add(i1, i2)
-        debug_merge_point(0, 0, 0, 0, ConstPtr(ptr0))
+        debug_merge_point(0, 0, 0, 0, 0, ConstPtr(ptr0))
         guard_nonnull(p2) []
         guard_true(i3) []
         """, namespace={'ptr0': code_gcref}).operations
@@ -87,7 +87,7 @@
         def interp_on_abort():
             pypy_hooks.on_abort(ABORT_TOO_LONG, pypyjitdriver, greenkey,
                                 'blah')
-        
+
         cls.w_on_compile = space.wrap(interp2app(interp_on_compile))
         cls.w_on_compile_bridge = space.wrap(interp2app(interp_on_compile_bridge))
         cls.w_on_abort = space.wrap(interp2app(interp_on_abort))
@@ -105,7 +105,7 @@
 
         def hook(name, looptype, tuple_or_guard_no, ops, asmstart, asmlen):
             all.append((name, looptype, tuple_or_guard_no, ops))
-        
+
         self.on_compile()
         pypyjit.set_compile_hook(hook)
         assert not all
@@ -123,6 +123,7 @@
         assert dmp.pycode is self.f.func_code
         assert dmp.greenkey == (self.f.func_code, 0, False)
         assert dmp.call_depth == 0
+        assert dmp.call_id == 0
         assert int_add.name == 'int_add'
         assert int_add.num == self.int_add_num
         self.on_compile_bridge()
@@ -151,18 +152,18 @@
     def test_non_reentrant(self):
         import pypyjit
         l = []
-        
+
         def hook(*args):
             l.append(None)
             self.on_compile()
             self.on_compile_bridge()
-        
+
         pypyjit.set_compile_hook(hook)
         self.on_compile()
         assert len(l) == 1 # and did not crash
         self.on_compile_bridge()
         assert len(l) == 2 # and did not crash
-        
+
     def test_on_compile_types(self):
         import pypyjit
         l = []
@@ -182,7 +183,7 @@
 
         def hook(jitdriver_name, greenkey, reason):
             l.append((jitdriver_name, reason))
-        
+
         pypyjit.set_abort_hook(hook)
         self.on_abort()
         assert l == [('pypyjit', 'ABORT_TOO_LONG')]
@@ -224,13 +225,14 @@
         def f():
             pass
 
-        op = DebugMergePoint([Box(0)], 'repr', 'pypyjit', 2, (f.func_code, 0, 0))
+        op = DebugMergePoint([Box(0)], 'repr', 'pypyjit', 2, 3, (f.func_code, 0, 0))
         assert op.bytecode_no == 0
         assert op.pycode is f.func_code
         assert repr(op) == 'repr'
         assert op.jitdriver_name == 'pypyjit'
         assert op.num == self.dmp_num
         assert op.call_depth == 2
-        op = DebugMergePoint([Box(0)], 'repr', 'notmain', 5, ('str',))
+        assert op.call_id == 3
+        op = DebugMergePoint([Box(0)], 'repr', 'notmain', 5, 4, ('str',))
         raises(AttributeError, 'op.pycode')
         assert op.call_depth == 5
diff --git a/pypy/module/pypyjit/test/test_policy.py b/pypy/module/pypyjit/test/test_policy.py
--- a/pypy/module/pypyjit/test/test_policy.py
+++ b/pypy/module/pypyjit/test/test_policy.py
@@ -38,6 +38,10 @@
     assert pypypolicy.look_inside_function(Local.getdict.im_func)
     assert pypypolicy.look_inside_function(get_ident)
 
+def test_time():
+    from pypy.module.rctime.interp_time import time
+    assert pypypolicy.look_inside_function(time)
+
 def test_pypy_module():
     from pypy.module._collections.interp_deque import W_Deque
     from pypy.module._random.interp_random import W_Random
diff --git a/pypy/module/pypyjit/test_pypy_c/test_00_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_00_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -60,6 +60,9 @@
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
         stdout, stderr = pipe.communicate()
+        if getattr(pipe, 'returncode', 0) < 0:
+            raise IOError("subprocess was killed by signal %d" % (
+                pipe.returncode,))
         if stderr.startswith('SKIP:'):
             py.test.skip(stderr)
         if stderr.startswith('debug_alloc.h:'):   # lldebug builds
diff --git a/pypy/module/pypyjit/test_pypy_c/test_alloc.py b/pypy/module/pypyjit/test_pypy_c/test_alloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_alloc.py
@@ -0,0 +1,26 @@
+import py, sys
+from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
+
+class TestAlloc(BaseTestPyPyC):
+
+    SIZES = dict.fromkeys([2 ** n for n in range(26)] +     # up to 32MB
+                          [2 ** n - 1 for n in range(26)])
+
+    def test_newstr_constant_size(self):
+        for size in TestAlloc.SIZES:
+            yield self.newstr_constant_size, size
+
+    def newstr_constant_size(self, size):
+        src = """if 1:
+                    N = %(size)d
+                    part_a = 'a' * N
+                    part_b = 'b' * N
+                    for i in xrange(20):
+                        ao = '%%s%%s' %% (part_a, part_b)
+                    def main():
+                        return 42
+""" % {'size': size}
+        log = self.run(src, [], threshold=10)
+        assert log.result == 42
+        loop, = log.loops_by_filename(self.filepath)
+        # assert did not crash
diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py
--- a/pypy/module/pypyjit/test_pypy_c/test_instance.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py
@@ -201,3 +201,28 @@
         loop, = log.loops_by_filename(self.filepath)
         assert loop.match_by_id("compare", "") # optimized away
 
+    def test_super(self):
+        def main():
+            class A(object):
+                def m(self, x):
+                    return x + 1
+            class B(A):
+                def m(self, x):
+                    return super(B, self).m(x)
+            i = 0
+            while i < 300:
+                i = B().m(i)
+            return i
+
+        log = self.run(main, [])
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i78 = int_lt(i72, 300)
+            guard_true(i78, descr=...)
+            guard_not_invalidated(descr=...)
+            i79 = force_token()
+            i80 = force_token()
+            i81 = int_add(i72, 1)
+            --TICK--
+            jump(..., descr=...)
+        """)
diff --git a/pypy/module/rctime/interp_time.py b/pypy/module/rctime/interp_time.py
--- a/pypy/module/rctime/interp_time.py
+++ b/pypy/module/rctime/interp_time.py
@@ -24,6 +24,7 @@
     from pypy.module.thread import ll_thread as thread
 
     eci = ExternalCompilationInfo(
+        post_include_bits = ["BOOL pypy_timemodule_setCtrlHandler(HANDLE event);"],
         separate_module_sources=['''
             #include <windows.h>
 
diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py
--- a/pypy/module/select/__init__.py
+++ b/pypy/module/select/__init__.py
@@ -22,6 +22,13 @@
             if value is not None:
                 interpleveldefs[symbol] = "space.wrap(%r)" % value
 
+    if 'bsd' in sys.platform or sys.platform.startswith('darwin'):
+        interpleveldefs["kqueue"] = "interp_kqueue.W_Kqueue"
+        interpleveldefs["kevent"] = "interp_kqueue.W_Kevent"
+        from pypy.module.select.interp_kqueue import symbol_map
+        for symbol in symbol_map:
+            interpleveldefs[symbol] = "space.wrap(interp_kqueue.%s)" % symbol
+
     def buildloaders(cls):
         from pypy.rlib import rpoll
         for name in rpoll.eventnames:
diff --git a/pypy/module/select/interp_epoll.py b/pypy/module/select/interp_epoll.py
--- a/pypy/module/select/interp_epoll.py
+++ b/pypy/module/select/interp_epoll.py
@@ -58,7 +58,7 @@
 )
 epoll_wait = rffi.llexternal(
     "epoll_wait",
-    [rffi.INT, lltype.Ptr(rffi.CArray(epoll_event)), rffi.INT, rffi.INT],
+    [rffi.INT, rffi.CArrayPtr(epoll_event), rffi.INT, rffi.INT],
     rffi.INT,
     compilation_info=eci,
 )
diff --git a/pypy/module/select/interp_kqueue.py b/pypy/module/select/interp_kqueue.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/interp_kqueue.py
@@ -0,0 +1,343 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt, exception_from_errno
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, generic_new_descr, GetSetProperty
+from pypy.rlib._rsocket_rffi import socketclose
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.rpython.tool import rffi_platform
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+
+eci = ExternalCompilationInfo(
+    includes = ["sys/types.h",
+                "sys/event.h",
+                "sys/time.h"],
+)
+
+
+class CConfig:
+    _compilation_info_ = eci
+
+
+CConfig.kevent = rffi_platform.Struct("struct kevent", [
+    ("ident", rffi.UINTPTR_T),
+    ("filter", rffi.SHORT),
+    ("flags", rffi.USHORT),
+    ("fflags", rffi.UINT),
+    ("data", rffi.INTPTR_T),
+    ("udata", rffi.VOIDP),
+])
+
+
+CConfig.timespec = rffi_platform.Struct("struct timespec", [
+    ("tv_sec", rffi.TIME_T),
+    ("tv_nsec", rffi.LONG),
+])
+
+
+symbol_map = {
+    "KQ_FILTER_READ": "EVFILT_READ",
+    "KQ_FILTER_WRITE": "EVFILT_WRITE",
+    "KQ_FILTER_AIO": "EVFILT_AIO",
+    "KQ_FILTER_VNODE": "EVFILT_VNODE",
+    "KQ_FILTER_PROC": "EVFILT_PROC",
+#    "KQ_FILTER_NETDEV": None, # deprecated on FreeBSD .. no longer defined
+    "KQ_FILTER_SIGNAL": "EVFILT_SIGNAL",
+    "KQ_FILTER_TIMER": "EVFILT_TIMER",
+    "KQ_EV_ADD": "EV_ADD",
+    "KQ_EV_DELETE": "EV_DELETE",
+    "KQ_EV_ENABLE": "EV_ENABLE",
+    "KQ_EV_DISABLE": "EV_DISABLE",
+    "KQ_EV_ONESHOT": "EV_ONESHOT",
+    "KQ_EV_CLEAR": "EV_CLEAR",
+#    "KQ_EV_SYSFLAGS": None, # Python docs says "internal event" .. not defined on FreeBSD
+#    "KQ_EV_FLAG1": None, # Python docs says "internal event" .. not defined on FreeBSD
+    "KQ_EV_EOF": "EV_EOF",
+    "KQ_EV_ERROR": "EV_ERROR"
+}
+
+for symbol in symbol_map.values():
+    setattr(CConfig, symbol, rffi_platform.DefinedConstantInteger(symbol))
+
+cconfig = rffi_platform.configure(CConfig)
+
+kevent = cconfig["kevent"]
+timespec = cconfig["timespec"]
+
+for symbol in symbol_map:
+    globals()[symbol] = cconfig[symbol_map[symbol]]
+
+
+syscall_kqueue = rffi.llexternal(
+    "kqueue",
+    [],
+    rffi.INT,
+    compilation_info=eci
+)
+
+syscall_kevent = rffi.llexternal(
+    "kevent",
+    [rffi.INT,
+     lltype.Ptr(rffi.CArray(kevent)),
+     rffi.INT,
+     lltype.Ptr(rffi.CArray(kevent)),
+     rffi.INT,
+     lltype.Ptr(timespec)
+    ],
+    rffi.INT,
+    compilation_info=eci
+)
+
+
+class W_Kqueue(Wrappable):
+    def __init__(self, space, kqfd):
+        self.kqfd = kqfd
+
+    def descr__new__(space, w_subtype):
+        kqfd = syscall_kqueue()
+        if kqfd < 0:
+            raise exception_from_errno(space, space.w_IOError)
+        return space.wrap(W_Kqueue(space, kqfd))
+
+    @unwrap_spec(fd=int)
+    def descr_fromfd(space, w_cls, fd):
+        return space.wrap(W_Kqueue(space, fd))
+
+    def __del__(self):
+        self.close()
+
+    def get_closed(self):
+        return self.kqfd < 0
+
+    def close(self):
+        if not self.get_closed():
+            kqfd = self.kqfd
+            self.kqfd = -1
+            socketclose(kqfd)
+
+    def check_closed(self, space):
+        if self.get_closed():
+            raise OperationError(space.w_ValueError, space.wrap("I/O operation on closed kqueue fd"))
+
+    def descr_get_closed(self, space):
+        return space.wrap(self.get_closed())
+
+    def descr_fileno(self, space):
+        self.check_closed(space)
+        return space.wrap(self.kqfd)
+
+    def descr_close(self, space):
+        self.close()
+
+    @unwrap_spec(max_events=int)
+    def descr_control(self, space, w_changelist, max_events, w_timeout=None):
+
+        self.check_closed(space)
+
+        if max_events < 0:
+            raise operationerrfmt(space.w_ValueError,
+                "Length of eventlist must be 0 or positive, got %d", max_events
+            )
+
+        if space.is_w(w_changelist, space.w_None):
+            changelist_len = 0
+        else:
+            changelist_len = space.len_w(w_changelist)
+
+        with lltype.scoped_alloc(rffi.CArray(kevent), changelist_len) as changelist:
+            with lltype.scoped_alloc(rffi.CArray(kevent), max_events) as eventlist:
+                with lltype.scoped_alloc(timespec) as timeout:
+
+                    if not space.is_w(w_timeout, space.w_None):
+                        _timeout = space.float_w(w_timeout)
+                        if _timeout < 0:
+                            raise operationerrfmt(space.w_ValueError,
+                                "Timeout must be None or >= 0, got %s", str(_timeout)
+                            )
+                        sec = int(_timeout)
+                        nsec = int(1e9 * (_timeout - sec))
+                        rffi.setintfield(timeout, 'c_tv_sec', sec)
+                        rffi.setintfield(timeout, 'c_tv_nsec', nsec)
+                        ptimeout = timeout
+                    else:
+                        ptimeout = lltype.nullptr(timespec)
+
+                    if not space.is_w(w_changelist, space.w_None):
+                        i = 0
+                        for w_ev in space.listview(w_changelist):
+                            ev = space.interp_w(W_Kevent, w_ev)
+                            changelist[i].c_ident = ev.event.c_ident
+                            changelist[i].c_filter = ev.event.c_filter
+                            changelist[i].c_flags = ev.event.c_flags
+                            changelist[i].c_fflags = ev.event.c_fflags
+                            changelist[i].c_data = ev.event.c_data
+                            changelist[i].c_udata = ev.event.c_udata
+                            i += 1
+                        pchangelist = changelist
+                    else:
+                        pchangelist = lltype.nullptr(rffi.CArray(kevent))
+
+                    nfds = syscall_kevent(self.kqfd,
+                                          pchangelist,
+                                          changelist_len,
+                                          eventlist,
+                                          max_events,
+                                          ptimeout)
+                    if nfds < 0:
+                        raise exception_from_errno(space, space.w_IOError)
+                    else:
+                        elist_w = [None] * nfds
+                        for i in xrange(nfds):
+
+                            evt = eventlist[i]
+
+                            w_event = W_Kevent(space)
+                            w_event.event = lltype.malloc(kevent, flavor="raw")
+                            w_event.event.c_ident = evt.c_ident
+                            w_event.event.c_filter = evt.c_filter
+                            w_event.event.c_flags = evt.c_flags
+                            w_event.event.c_fflags = evt.c_fflags
+                            w_event.event.c_data = evt.c_data
+                            w_event.event.c_udata = evt.c_udata
+
+                            elist_w[i] = w_event
+
+                    return space.newlist(elist_w)
+
+
+W_Kqueue.typedef = TypeDef("select.kqueue",
+    __new__ = interp2app(W_Kqueue.descr__new__.im_func),
+    fromfd = interp2app(W_Kqueue.descr_fromfd.im_func, as_classmethod=True),
+
+    closed = GetSetProperty(W_Kqueue.descr_get_closed),
+    fileno = interp2app(W_Kqueue.descr_fileno),
+
+    close = interp2app(W_Kqueue.descr_close),
+    control = interp2app(W_Kqueue.descr_control),
+)
+W_Kqueue.typedef.acceptable_as_base_class = False
+
+
+class W_Kevent(Wrappable):
+    def __init__(self, space):
+        self.event = lltype.nullptr(kevent)
+
+    def __del__(self):
+        if self.event:
+            lltype.free(self.event, flavor="raw")
+
+    @unwrap_spec(filter=int, flags='c_uint', fflags='c_uint', data=int, udata='c_uint')
+    def descr__init__(self, space, w_ident, filter=KQ_FILTER_READ, flags=KQ_EV_ADD, fflags=0, data=0, udata=0):
+        ident = space.c_filedescriptor_w(w_ident)
+
+        self.event = lltype.malloc(kevent, flavor="raw")
+        rffi.setintfield(self.event, "c_ident", ident)
+        rffi.setintfield(self.event, "c_filter", filter)
+        rffi.setintfield(self.event, "c_flags", flags)
+        rffi.setintfield(self.event, "c_fflags", fflags)
+        rffi.setintfield(self.event, "c_data", data)
+        self.event.c_udata = rffi.cast(rffi.VOIDP, udata)
+
+    def _compare_all_fields(self, other, op):
+        l_ident = self.event.c_ident
+        r_ident = other.event.c_ident
+        l_filter = rffi.cast(lltype.Signed, self.event.c_filter)
+        r_filter = rffi.cast(lltype.Signed, other.event.c_filter)
+        l_flags = rffi.cast(lltype.Unsigned, self.event.c_flags)
+        r_flags = rffi.cast(lltype.Unsigned, other.event.c_flags)
+        l_fflags = rffi.cast(lltype.Unsigned, self.event.c_fflags)
+        r_fflags = rffi.cast(lltype.Unsigned, other.event.c_fflags)
+        l_data = self.event.c_data
+        r_data = other.event.c_data
+        l_udata = rffi.cast(lltype.Unsigned, self.event.c_udata)
+        r_udata = rffi.cast(lltype.Unsigned, other.event.c_udata)
+
+        if op == "eq":
+            return l_ident == r_ident and \
+                   l_filter == r_filter and \
+                   l_flags == r_flags and \
+                   l_fflags == r_fflags and \
+                   l_data == r_data and \
+                   l_udata == r_udata
+        elif op == "lt":
+            return (l_ident < r_ident) or \
+                   (l_ident == r_ident and l_filter < r_filter) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags < r_flags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags < r_fflags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data < r_data) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data == r_data and l_udata < r_udata)
+        elif op == "gt":
+            return (l_ident > r_ident) or \
+                   (l_ident == r_ident and l_filter > r_filter) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags > r_flags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags > r_fflags) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data > r_data) or \
+                   (l_ident == r_ident and l_filter == r_filter and l_flags == r_flags and l_fflags == r_fflags and l_data == r_data and l_udata > r_udata)
+        else:
+            assert False
+
+    def compare_all_fields(self, space, other, op):
+        if not space.interp_w(W_Kevent, other):
+            if op == "eq":
+                return False
+            elif op == "ne":
+                return True
+            else:
+                raise OperationError(space.w_TypeError, space.wrap('cannot compare kevent to incompatible type'))
+        return self._compare_all_fields(space.interp_w(W_Kevent, other), op)
+
+    def descr__eq__(self, space, w_other):
+        return space.wrap(self.compare_all_fields(space, w_other, "eq"))
+
+    def descr__ne__(self, space, w_other):
+        return space.wrap(not self.compare_all_fields(space, w_other, "eq"))
+
+    def descr__le__(self, space, w_other):
+        return space.wrap(not self.compare_all_fields(space, w_other, "gt"))
+
+    def descr__lt__(self, space, w_other):
+        return space.wrap(self.compare_all_fields(space, w_other, "lt"))
+
+    def descr__ge__(self, space, w_other):
+        return space.wrap(not self.compare_all_fields(space, w_other, "lt"))
+
+    def descr__gt__(self, space, w_other):
+        return space.wrap(self.compare_all_fields(space, w_other, "gt"))
+
+    def descr_get_ident(self, space):
+        return space.wrap(self.event.c_ident)
+
+    def descr_get_filter(self, space):
+        return space.wrap(self.event.c_filter)
+
+    def descr_get_flags(self, space):
+        return space.wrap(self.event.c_flags)
+
+    def descr_get_fflags(self, space):
+        return space.wrap(self.event.c_fflags)
+
+    def descr_get_data(self, space):
+        return space.wrap(self.event.c_data)
+
+    def descr_get_udata(self, space):
+        return space.wrap(rffi.cast(rffi.SIZE_T, self.event.c_udata))
+
+
+W_Kevent.typedef = TypeDef("select.kevent",
+    __new__ = generic_new_descr(W_Kevent),
+    __init__ = interp2app(W_Kevent.descr__init__),
+    __eq__ = interp2app(W_Kevent.descr__eq__),
+    __ne__ = interp2app(W_Kevent.descr__ne__),
+    __le__ = interp2app(W_Kevent.descr__le__),
+    __lt__ = interp2app(W_Kevent.descr__lt__),
+    __ge__ = interp2app(W_Kevent.descr__ge__),
+    __gt__ = interp2app(W_Kevent.descr__gt__),
+
+    ident = GetSetProperty(W_Kevent.descr_get_ident),
+    filter = GetSetProperty(W_Kevent.descr_get_filter),
+    flags = GetSetProperty(W_Kevent.descr_get_flags),
+    fflags = GetSetProperty(W_Kevent.descr_get_fflags),
+    data = GetSetProperty(W_Kevent.descr_get_data),
+    udata = GetSetProperty(W_Kevent.descr_get_udata),
+)
+W_Kevent.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/select/test/test_kqueue.py b/pypy/module/select/test/test_kqueue.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/test/test_kqueue.py
@@ -0,0 +1,190 @@
+# adapted from CPython: Lib/test/test_kqueue.py
+
+import py
+import sys
+
+from pypy.conftest import gettestobjspace
+
+
+class AppTestKqueue(object):
+    def setup_class(cls):
+        if not 'bsd' in sys.platform and \
+           not sys.platform.startswith('darwin'):
+            py.test.skip("test requires BSD")
+        cls.space = gettestobjspace(usemodules=["select", "_socket", "posix"])
+
+    def test_create(self):
+        import select
+
+        kq = select.kqueue()
+        assert kq.fileno() > 0
+        assert not kq.closed
+        kq.close()
+        assert kq.closed
+        raises(ValueError, kq.fileno)
+
+    def test_create_event(self):
+        import select
+        import sys
+
+        fd = sys.stderr.fileno()
+        ev = select.kevent(fd)
+        other = select.kevent(1000)
+        assert ev.ident == fd
+        assert ev.filter == select.KQ_FILTER_READ
+        assert ev.flags == select.KQ_EV_ADD
+        assert ev.fflags == 0
+        assert ev.data == 0
+        assert ev.udata == 0
+        assert ev == ev
+        assert ev != other
+        assert cmp(ev, other) == -1
+        assert ev < other
+        assert other >= ev
+        raises(TypeError, cmp, ev, None)
+        raises(TypeError, cmp, ev, 1)
+        raises(TypeError, cmp, ev, "ev")
+
+        ev = select.kevent(fd, select.KQ_FILTER_WRITE)
+        assert ev.ident == fd
+        assert ev.filter == select.KQ_FILTER_WRITE
+        assert ev.flags == select.KQ_EV_ADD
+        assert ev.fflags == 0
+        assert ev.data == 0
+        assert ev.udata == 0
+        assert ev == ev
+        assert ev != other
+
+        ev = select.kevent(fd, select.KQ_FILTER_WRITE, select.KQ_EV_ONESHOT)
+        assert ev.ident == fd
+        assert ev.filter == select.KQ_FILTER_WRITE
+        assert ev.flags == select.KQ_EV_ONESHOT
+        assert ev.fflags == 0
+        assert ev.data == 0
+        assert ev.udata == 0
+        assert ev == ev
+        assert ev != other
+
+        ev = select.kevent(1, 2, 3, 4, 5, 6)
+        assert ev.ident == 1
+        assert ev.filter == 2
+        assert ev.flags == 3
+        assert ev.fflags == 4
+        assert ev.data == 5
+        assert ev.udata == 6
+        assert ev == ev
+        assert ev != other
+
+        bignum = (sys.maxsize * 2 + 1) & 0xffffffff
+        fd = sys.maxsize
+        ev = select.kevent(fd, 1, 2, bignum, sys.maxsize, bignum)
+        assert ev.ident == fd
+        assert ev.filter == 1
+        assert ev.flags == 2
+        assert ev.fflags == bignum
+        assert ev.data == sys.maxsize
+        assert ev.udata == bignum
+        assert ev == ev
+        assert ev != other
+
+    def test_queue_event(self):
+        import errno
+        import select
+        import socket
+        import sys
+
+        server_socket = socket.socket()
+        server_socket.bind(("127.0.0.1", 0))
+        server_socket.listen(1)
+        client = socket.socket()
+        client.setblocking(False)
+        try:
+            client.connect(("127.0.0.1", server_socket.getsockname()[1]))
+        except socket.error as e:
+            if 'bsd' in sys.platform:
+                assert e.args[0] == errno.ENOENT
+            else:
+                assert e.args[0] == errno.EINPROGRESS
+        server, addr = server_socket.accept()
+
+        if sys.platform.startswith("darwin"):
+            flags = select.KQ_EV_ADD | select.KQ_EV_ENABLE
+        else:
+            flags = 0
+
+        kq1 = select.kqueue()
+        kq2 = select.kqueue.fromfd(kq1.fileno())
+
+        ev = select.kevent(server.fileno(), select.KQ_FILTER_WRITE, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq1.control([ev], 0)
+        ev = select.kevent(server.fileno(), select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq1.control([ev], 0)
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_WRITE, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq2.control([ev], 0)
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        kq2.control([ev], 0)
+
+        events = kq1.control(None, 4, 1)
+        events = [(e.ident, e.filter, e.flags) for e in events]
+        events.sort()
+        assert events == [
+            (client.fileno(), select.KQ_FILTER_WRITE, flags),
+            (server.fileno(), select.KQ_FILTER_WRITE, flags),
+        ]
+        client.send("Hello!")
+        server.send("world!!!")
+
+        for i in xrange(10):
+            events = kq1.control(None, 4, 1)
+            if len(events) == 4:
+                break
+            time.sleep(1.0)
+        else:
+            assert False, "timeout waiting for event notification"
+
+        events = [(e.ident, e.filter, e.flags) for e in events]
+        events.sort()
+        assert events == [
+            (client.fileno(), select.KQ_FILTER_WRITE, flags),
+            (client.fileno(), select.KQ_FILTER_READ, flags),
+            (server.fileno(), select.KQ_FILTER_WRITE, flags),
+            (server.fileno(), select.KQ_FILTER_READ, flags),
+        ]
+
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_WRITE, select.KQ_EV_DELETE)
+        kq1.control([ev], 0)
+        ev = select.kevent(client.fileno(), select.KQ_FILTER_READ, select.KQ_EV_DELETE)
+        kq1.control([ev], 0)
+        ev = select.kevent(server.fileno(), select.KQ_FILTER_READ, select.KQ_EV_DELETE)
+        kq1.control([ev], 0, 0)
+
+        events = kq1.control([], 4, 0.99)
+        events = [(e.ident, e.filter, e.flags) for e in events]
+        events.sort()
+        assert events == [
+            (server.fileno(), select.KQ_FILTER_WRITE, flags),
+        ]
+
+        client.close()
+        server.close()
+        server_socket.close()
+
+    def test_pair(self):
+        import select
+        import socket
+
+        kq = select.kqueue()
+        a, b = socket.socketpair()
+
+        a.send('foo')
+        event1 = select.kevent(a, select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        event2 = select.kevent(b, select.KQ_FILTER_READ, select.KQ_EV_ADD | select.KQ_EV_ENABLE)
+        r = kq.control([event1, event2], 1, 1)
+        assert r
+        assert r[0].flags & select.KQ_EV_ERROR == 0
+        data = b.recv(r[0].data)
+        assert data == 'foo'
+
+        a.close()
+        b.close()
+        kq.close()
diff --git a/pypy/module/select/test/test_ztranslation.py b/pypy/module/select/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/select/test/test_ztranslation.py
@@ -0,0 +1,5 @@
+
+from pypy.objspace.fake.checkmodule import checkmodule
+
+def test_select_translates():
+    checkmodule('select')
diff --git a/pypy/module/signal/interp_signal.py b/pypy/module/signal/interp_signal.py
--- a/pypy/module/signal/interp_signal.py
+++ b/pypy/module/signal/interp_signal.py
@@ -11,11 +11,11 @@
 import sys
 from pypy.tool import autopath
 from pypy.rlib import jit, rposix
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, is_valid_int
 
 def setup():
     for key, value in cpy_signal.__dict__.items():
-        if key.startswith('SIG') and isinstance(value, int):
+        if key.startswith('SIG') and is_valid_int(value):
             globals()[key] = value
             yield key
 
diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -1,5 +1,6 @@
 from pypy.rlib import jit
 from pypy.rlib.objectmodel import specialize
+from pypy.rlib.rstring import StringBuilder
 from pypy.rlib.rstruct.error import StructError
 from pypy.rlib.rstruct.formatiterator import FormatIterator
 from pypy.rlib.rstruct.standardfmttable import PACK_ACCEPTS_BROKEN_INPUT
@@ -8,11 +9,11 @@
 
 class PackFormatIterator(FormatIterator):
 
-    def __init__(self, space, args_w):
+    def __init__(self, space, args_w, size):
         self.space = space
         self.args_w = args_w
         self.args_index = 0
-        self.result = []      # list of characters
+        self.result = StringBuilder(size)
 
     # This *should* be always unroll safe, the only way to get here is by
     # unroll the interpret function, which means the fmt is const, and thus
@@ -29,9 +30,8 @@
 
     @jit.unroll_safe
     def align(self, mask):
-        pad = (-len(self.result)) & mask
-        for i in range(pad):
-            self.result.append('\x00')
+        pad = (-self.result.getlength()) & mask
+        self.result.append_multiple_char('\x00', pad)
 
     def finished(self):
         if self.args_index != len(self.args_w):
diff --git a/pypy/module/struct/interp_struct.py b/pypy/module/struct/interp_struct.py
--- a/pypy/module/struct/interp_struct.py
+++ b/pypy/module/struct/interp_struct.py
@@ -1,28 +1,34 @@
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.module.struct.formatiterator import PackFormatIterator, UnpackFormatIterator
+from pypy.rlib import jit
 from pypy.rlib.rstruct.error import StructError
 from pypy.rlib.rstruct.formatiterator import CalcSizeFormatIterator
 
 
 @unwrap_spec(format=str)
 def calcsize(space, format):
+    return space.wrap(_calcsize(space, format))
+
+def _calcsize(space, format):
     fmtiter = CalcSizeFormatIterator()
     try:
         fmtiter.interpret(format)
     except StructError, e:
         raise e.at_applevel(space)
-    return space.wrap(fmtiter.totalsize)
-
+    return fmtiter.totalsize
 
 @unwrap_spec(format=str)
 def pack(space, format, args_w):
-    fmtiter = PackFormatIterator(space, args_w)
+    if jit.isconstant(format):
+        size = _calcsize(space, format)
+    else:
+        size = 8
+    fmtiter = PackFormatIterator(space, args_w, size)
     try:
         fmtiter.interpret(format)
     except StructError, e:
         raise e.at_applevel(space)
-    result = ''.join(fmtiter.result)
-    return space.wrap(result)
+    return space.wrap(fmtiter.result.build())
 
 
 @unwrap_spec(format=str, input='bufferstr')
diff --git a/pypy/module/struct/test/test_ztranslation.py b/pypy/module/struct/test/test_ztranslation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/struct/test/test_ztranslation.py
@@ -0,0 +1,6 @@
+from pypy.objspace.fake.checkmodule import checkmodule
+
+
+def test_checkmodule():
+    checkmodule('struct')
+
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_errno.py b/pypy/module/test_lib_pypy/ctypes_tests/test_errno.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_errno.py
@@ -0,0 +1,21 @@
+import py
+
+import ctypes
+from _ctypes import function
+
+_rawffi = py.test.importorskip("_rawffi")
+
+class TestErrno:
+
+    def test_errno_saved_and_restored(self):
+        def check():
+            assert _rawffi.get_errno() == 42
+            assert ctypes.get_errno() == old
+        check.free_temp_buffers = lambda *args: None
+        f = function.CFuncPtr()
+        old = _rawffi.get_errno()
+        f._flags_ = _rawffi.FUNCFLAG_USE_ERRNO
+        ctypes.set_errno(42)
+        f._call_funcptr(check)
+        assert _rawffi.get_errno() == old
+        ctypes.set_errno(0)
diff --git a/pypy/module/test_lib_pypy/test_collections.py b/pypy/module/test_lib_pypy/test_collections.py
--- a/pypy/module/test_lib_pypy/test_collections.py
+++ b/pypy/module/test_lib_pypy/test_collections.py
@@ -6,7 +6,7 @@
 
 from pypy.conftest import gettestobjspace
 
-class AppTestcStringIO:
+class AppTestCollections:
     def test_copy(self):
         import _collections
         def f():
diff --git a/pypy/objspace/flow/model.py b/pypy/objspace/flow/model.py
--- a/pypy/objspace/flow/model.py
+++ b/pypy/objspace/flow/model.py
@@ -8,6 +8,8 @@
 from pypy.tool.descriptor import roproperty
 from pypy.tool.sourcetools import PY_IDENTIFIER, nice_repr_for_func
 from pypy.tool.identity_dict import identity_dict
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 """
     memory size before and after introduction of __slots__
@@ -542,7 +544,7 @@
                     cases = [link.exitcase for link in block.exits]
                     has_default = cases[-1] == 'default'
                     for n in cases[:len(cases)-has_default]:
-                        if isinstance(n, (int, long)):
+                        if is_valid_int(n):
                             continue
                         if isinstance(n, (str, unicode)) and len(n) == 1:
                             continue
diff --git a/pypy/objspace/flow/objspace.py b/pypy/objspace/flow/objspace.py
--- a/pypy/objspace/flow/objspace.py
+++ b/pypy/objspace/flow/objspace.py
@@ -14,6 +14,7 @@
 from pypy.objspace.flow import flowcontext, operation, specialcase
 from pypy.rlib.unroll import unrolling_iterable, _unroller
 from pypy.rlib import rstackovf, rarithmetic
+from pypy.rlib.rarithmetic import is_valid_int
 
 
 # method-wrappers have not enough introspection in CPython
@@ -141,7 +142,7 @@
     def int_w(self, w_obj):
         if isinstance(w_obj, Constant):
             val = w_obj.value
-            if type(val) not in (int,long):
+            if not is_valid_int(val):
                 raise TypeError("expected integer: " + repr(w_obj))
             return val
         return self.unwrap(w_obj)
diff --git a/pypy/objspace/flow/operation.py b/pypy/objspace/flow/operation.py
--- a/pypy/objspace/flow/operation.py
+++ b/pypy/objspace/flow/operation.py
@@ -30,10 +30,7 @@
 
 def new_style_type(x):
     """Simulate a situation where every class is new-style"""
-    t = getattr(x, '__class__', type(x))
-    if t is types.ClassType:   # guess who's here?  exception classes...
-        t = type
-    return t
+    return getattr(x, '__class__', type(x))
 
 def do_int(x):
     return x.__int__()
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -2,6 +2,7 @@
 from pypy.objspace.std.model import registerimplementation, W_Object
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.settype import set_typedef as settypedef
+from pypy.objspace.std.frozensettype import frozenset_typedef as frozensettypedef
 from pypy.interpreter import gateway
 from pypy.interpreter.argument import Signature
 from pypy.interpreter.error import OperationError, operationerrfmt
@@ -488,7 +489,7 @@
 
 class _UnwrappedIteratorMixin:
     _mixin_ = True
-    
+
     def __init__(self, space, strategy, dictimplementation):
         IteratorImplementation.__init__(self, space, dictimplementation)
         self.iterator = strategy.unerase(dictimplementation.dstorage).iteritems()
@@ -837,10 +838,12 @@
         return all_contained_in(space, w_dictview, w_otherview)
     return space.w_False
 eq__DictViewKeys_settypedef = eq__DictViewKeys_DictViewKeys
+eq__DictViewKeys_frozensettypedef = eq__DictViewKeys_DictViewKeys
 
 eq__DictViewKeys_DictViewItems = eq__DictViewKeys_DictViewKeys
 eq__DictViewItems_DictViewItems = eq__DictViewKeys_DictViewKeys
 eq__DictViewItems_settypedef = eq__DictViewItems_DictViewItems
+eq__DictViewItems_frozensettypedef = eq__DictViewItems_DictViewItems
 
 def repr__DictViewKeys(space, w_dictview):
     w_seq = space.call_function(space.w_list, w_dictview)
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -7,7 +7,7 @@
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import slicetype
 from pypy.interpreter import gateway, baseobjspace
-from pypy.rlib.objectmodel import instantiate, specialize
+from pypy.rlib.objectmodel import instantiate, specialize, newlist_hint
 from pypy.rlib.listsort import make_timsort_class
 from pypy.rlib import rerased, jit, debug
 from pypy.interpreter.argument import Signature
@@ -32,9 +32,11 @@
     storage = strategy.erase(None)
     return W_ListObject.from_storage_and_strategy(space, storage, strategy)
 
- at jit.look_inside_iff(lambda space, list_w: jit.isconstant(len(list_w)) and len(list_w) < UNROLL_CUTOFF)
-def get_strategy_from_list_objects(space, list_w):
+ at jit.look_inside_iff(lambda space, list_w, sizehint: jit.isconstant(len(list_w)) and len(list_w) < UNROLL_CUTOFF)
+def get_strategy_from_list_objects(space, list_w, sizehint):
     if not list_w:
+        if sizehint != -1:
+            return SizeListStrategy(space, sizehint)
         return space.fromcache(EmptyListStrategy)
 
     # check for ints
@@ -75,11 +77,13 @@
 class W_ListObject(W_AbstractListObject):
     from pypy.objspace.std.listtype import list_typedef as typedef
 
-    def __init__(w_self, space, wrappeditems):
+    def __init__(w_self, space, wrappeditems, sizehint=-1):
         assert isinstance(wrappeditems, list)
         w_self.space = space
         if space.config.objspace.std.withliststrategies:
-            w_self.strategy = get_strategy_from_list_objects(space, wrappeditems)
+            w_self.strategy = get_strategy_from_list_objects(space,
+                                                             wrappeditems,
+                                                             sizehint)
         else:
             w_self.strategy = space.fromcache(ObjectListStrategy)
         w_self.init_from_list_w(wrappeditems)
@@ -255,6 +259,7 @@
 
 
 class ListStrategy(object):
+    sizehint = -1
 
     def __init__(self, space):
         self.space = space
@@ -336,6 +341,7 @@
     def sort(self, w_list, reverse):
         raise NotImplementedError
 
+
 class EmptyListStrategy(ListStrategy):
     """EmptyListStrategy is used when a W_List withouth elements is created.
     The storage is None. When items are added to the W_List a new RPython list
@@ -397,7 +403,7 @@
         else:
             strategy = self.space.fromcache(ObjectListStrategy)
 
-        storage = strategy.get_empty_storage()
+        storage = strategy.get_empty_storage(self.sizehint)
         w_list.strategy = strategy
         w_list.lstorage = storage
 
@@ -438,6 +444,13 @@
     def reverse(self, w_list):
         pass
 
+class SizeListStrategy(EmptyListStrategy):
+    """ Like empty, but when modified it'll preallocate the size to sizehint
+    """
+    def __init__(self, space, sizehint):
+        self.sizehint = sizehint
+        ListStrategy.__init__(self, space)
+
 class RangeListStrategy(ListStrategy):
     """RangeListStrategy is used when a list is created using the range method.
     The storage is a tuple containing only three integers start, step and length
@@ -660,8 +673,10 @@
         l = [self.unwrap(w_item) for w_item in list_w]
         w_list.lstorage = self.erase(l)
 
-    def get_empty_storage(self):
-        return self.erase([])
+    def get_empty_storage(self, sizehint):
+        if sizehint == -1:
+            return self.erase([])
+        return self.erase(newlist_hint(sizehint))
 
     def clone(self, w_list):
         l = self.unerase(w_list.lstorage)
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -16,6 +16,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.interpreter import gateway, unicodehelper
 from pypy.rlib.rstruct import ieee
+from pypy.rlib.rstring import StringBuilder
 
 from pypy.objspace.std.boolobject    import W_BoolObject
 from pypy.objspace.std.complexobject import W_ComplexObject
@@ -153,9 +154,9 @@
 register(TYPE_INT64, unmarshal_Int64)
 
 def pack_float(f):
-    result = []
+    result = StringBuilder(8)
     ieee.pack_float(result, f, 8, False)
-    return ''.join(result)
+    return result.build()
 
 def unpack_float(s):
     return ieee.unpack_float(s, False)
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -10,6 +10,10 @@
 
 
 @specialize.argtype(1)
+ at jit.look_inside_iff(lambda space, s, start, end:
+       jit.isconstant(s) and
+       jit.isconstant(start) and
+       jit.isconstant(end))
 def _parse_int(space, s, start, end):
     """Parse a number and check for overflows"""
     result = 0
@@ -91,9 +95,18 @@
                         if s[i] == "{":
                             i += 1
                             markup_follows = False
-                    # Attach literal data
+                    # Attach literal data, ending with { or }
                     out.append_slice(s, last_literal, i - 1)
                     if not markup_follows:
+                        if self.parser_list_w is not None:
+                            end_literal = i - 1
+                            assert end_literal > last_literal
+                            literal = self.template[last_literal:end_literal]
+                            w_entry = space.newtuple([
+                                space.wrap(literal),
+                                space.w_None, space.w_None, space.w_None])
+                            self.parser_list_w.append(w_entry)
+                            self.last_end = i
                         last_literal = i
                         continue
                     nested = 1
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -9,7 +9,7 @@
 from pypy.objspace.descroperation import DescrOperation, raiseattrerror
 from pypy.rlib.objectmodel import instantiate, r_dict, specialize, is_annotation_constant
 from pypy.rlib.debug import make_sure_not_resized
-from pypy.rlib.rarithmetic import base_int, widen
+from pypy.rlib.rarithmetic import base_int, widen, maxint
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib import jit
 
@@ -165,6 +165,10 @@
                 return self.newbool(x)
             else:
                 return self.newint(x)
+        # this is an inlined 'is_valid_int' which cannot be used
+        # due to the special annotation nature of 'wrap'.
+        if isinstance(x, long) and (-maxint - 1 <= x <= maxint):
+            return self.newint(x)
         if isinstance(x, str):
             return wrapstr(self, x)
         if isinstance(x, unicode):
@@ -300,8 +304,9 @@
         make_sure_not_resized(list_w)
         return wraptuple(self, list_w)
 
-    def newlist(self, list_w):
-        return W_ListObject(self, list_w)
+    def newlist(self, list_w, sizehint=-1):
+        assert not list_w or sizehint == -1
+        return W_ListObject(self, list_w, sizehint)
 
     def newlist_str(self, list_s):
         return W_ListObject.newlist_str(self, list_s)
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -8,6 +8,7 @@
 from pypy.objspace.std.ropeobject import W_RopeObject
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.rlib import rope
+from pypy.rlib.rstring import StringBuilder
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import unicodeobject, slicetype, iterobject
 from pypy.objspace.std.tupleobject import W_TupleObject
@@ -946,15 +947,16 @@
     return mod_format(space, w_format, w_values, do_unicode=True)
 
 def buffer__RopeUnicode(space, w_unicode):
-    from pypy.rlib.rstruct.unichar import pack_unichar
-    charlist = []
+    from pypy.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
     node = w_unicode._node
     iter = rope.ItemIterator(node)
-    for idx in range(node.length()):
+    length = node.length()
+    builder = StringBuilder(length * UNICODE_SIZE)
+    for idx in range(length):
         unich = unichr(iter.nextint())
-        pack_unichar(unich, charlist)
+        pack_unichar(unich, builder)
     from pypy.interpreter.buffer import StringBuffer
-    return space.wrap(StringBuffer(''.join(charlist)))
+    return space.wrap(StringBuffer(builder.build()))
 
 
 # methods of the iterator
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -56,9 +56,18 @@
         return w_self._value
 
     def unicode_w(w_self, space):
-        # XXX should this use the default encoding?
-        from pypy.objspace.std.unicodetype import plain_str2unicode
-        return plain_str2unicode(space, w_self._value)
+        # Use the default encoding.
+        from pypy.objspace.std.unicodetype import unicode_from_string, \
+                decode_object
+        w_defaultencoding = space.call_function(space.sys.get(
+                                                'getdefaultencoding'))
+        from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
+            unicode_from_string, decode_object
+        encoding, errors = _get_encoding_and_errors(space, w_defaultencoding,
+                                                    space.w_None)
+        if encoding is None and errors is None:
+            return space.unicode_w(unicode_from_string(space, w_self))
+        return space.unicode_w(decode_object(space, w_self, encoding, errors))
 
 registerimplementation(W_StringObject)
 
diff --git a/pypy/objspace/std/strutil.py b/pypy/objspace/std/strutil.py
--- a/pypy/objspace/std/strutil.py
+++ b/pypy/objspace/std/strutil.py
@@ -177,8 +177,10 @@
         return INFINITY
     elif low == "infinity" or low == "+infinity":
         return INFINITY
-    elif low == "nan" or low == "-nan" or low == "+nan":
+    elif low == "nan" or low == "+nan":
         return NAN
+    elif low == "-nan":
+        return -NAN
 
     try:
         return rstring_to_float(s)
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -613,6 +613,7 @@
         assert len(keys) == 2
         assert set(keys) == set([1, "a"])
         assert keys == set([1, "a"])
+        assert keys == frozenset([1, "a"])
         assert keys != set([1, "a", "b"])
         assert keys != set([1, "b"])
         assert keys != set([1])
@@ -633,6 +634,7 @@
         assert len(items) == 2
         assert set(items) == set([(1, 10), ("a", "ABC")])
         assert items == set([(1, 10), ("a", "ABC")])
+        assert items == frozenset([(1, 10), ("a", "ABC")])
         assert items != set([(1, 10), ("a", "ABC"), "junk"])
         assert items != set([(1, 10), ("a", "def")])
         assert items != set([(1, 10)])
diff --git a/pypy/objspace/std/test/test_listobject.py b/pypy/objspace/std/test/test_listobject.py
--- a/pypy/objspace/std/test/test_listobject.py
+++ b/pypy/objspace/std/test/test_listobject.py
@@ -1,6 +1,7 @@
 # coding: iso-8859-15
 import random
-from pypy.objspace.std.listobject import W_ListObject
+from pypy.objspace.std.listobject import W_ListObject, SizeListStrategy,\
+     IntegerListStrategy, ObjectListStrategy
 from pypy.interpreter.error import OperationError
 
 from pypy.conftest import gettestobjspace, option
@@ -390,6 +391,16 @@
         assert self.space.eq_w(self.space.le(w_list4, w_list3),
                            self.space.w_True)
 
+    def test_sizehint(self):
+        space = self.space
+        w_l = space.newlist([], sizehint=10)
+        assert isinstance(w_l.strategy, SizeListStrategy)
+        space.call_method(w_l, 'append', space.wrap(3))
+        assert isinstance(w_l.strategy, IntegerListStrategy)
+        w_l = space.newlist([], sizehint=10)
+        space.call_method(w_l, 'append', space.w_None)
+        assert isinstance(w_l.strategy, ObjectListStrategy)
+
 
 class AppTestW_ListObject(object):
     def setup_class(cls):
diff --git a/pypy/objspace/std/test/test_newformat.py b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -11,6 +11,7 @@
         assert self.s("}}").format() == self.s("}")
         assert self.s("{} {{ {}").format(1, 2) == self.s("1 { 2")
         assert self.s("{{}}").format() == self.s("{}")
+        assert self.s("{{{{").format() == self.s("{{")
 
     def test_empty(self):
         assert self.s().format() == self.s()
@@ -385,6 +386,12 @@
         for x in l[0]:
             assert isinstance(x, unicode)
 
+    def test_formatter_parser_escape(self):
+        l = list("{{a}}"._formatter_parser())
+        assert l == [('{', None, None, None), ('a}', None, None, None)]
+        l = list("{{{{"._formatter_parser())
+        assert l == [('{', None, None, None), ('{', None, None, None)]
+
     def test_formatter_field_name_split(self):
         first, rest = ''._formatter_field_name_split()
         assert first == ''
diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py
--- a/pypy/objspace/std/test/test_stringobject.py
+++ b/pypy/objspace/std/test/test_stringobject.py
@@ -501,6 +501,35 @@
         raises(TypeError, ''.join, [1])
         raises(TypeError, ''.join, [[1]])
 
+    def test_unicode_join_str_arg_ascii(self):
+        raises(UnicodeDecodeError, u''.join, ['\xc3\xa1'])
+
+    def test_unicode_join_str_arg_utf8(self):
+        # Need default encoding utf-8, but sys.setdefaultencoding
+        # is removed after startup.
+        import sys
+        old_encoding = sys.getdefaultencoding()
+        # Duplicate unittest.test_support.CleanImport logic because it won't
+        # import.
+        self.original_modules = sys.modules.copy()
+        try:
+            import sys as temp_sys
+            module_name = 'sys'
+            if module_name in sys.modules:
+                module = sys.modules[module_name]
+                # It is possible that module_name is just an alias for
+                # another module (e.g. stub for modules renamed in 3.x).
+                # In that case, we also need delete the real module to
+                # clear the import cache.
+                if module.__name__ != module_name:
+                    del sys.modules[module.__name__]
+                del sys.modules[module_name]
+            temp_sys.setdefaultencoding('utf-8')
+            assert u''.join(['\xc3\xa1']) == u'\xe1'
+        finally:
+            temp_sys.setdefaultencoding(old_encoding)
+            sys.modules.update(self.original_modules)
+
     def test_unicode_join_endcase(self):
         # This class inserts a Unicode object into its argument's natural
         # iteration, in the 3rd position.
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -6,8 +6,11 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
 from pypy.objspace.std import slicetype
-from pypy.interpreter import gateway
 from pypy.rlib.debug import make_sure_not_resized
+from pypy.rlib import jit
+
+# Tuples of known length up to UNROLL_TUPLE_LIMIT have unrolled certain methods
+UNROLL_TUPLE_LIMIT = 10
 
 class W_AbstractTupleObject(W_Object):
     __slots__ = ()
@@ -114,18 +117,28 @@
 def mul__ANY_Tuple(space, w_times, w_tuple):
     return mul_tuple_times(space, w_tuple, w_times)
 
+def tuple_unroll_condition(space, w_tuple1, w_tuple2):
+    lgt1 = len(w_tuple1.wrappeditems)
+    lgt2 = len(w_tuple2.wrappeditems)
+    return ((jit.isconstant(lgt1) and lgt1 <= UNROLL_TUPLE_LIMIT) or
+            (jit.isconstant(lgt2) and lgt2 <= UNROLL_TUPLE_LIMIT))
+
+ at jit.look_inside_iff(tuple_unroll_condition)
 def eq__Tuple_Tuple(space, w_tuple1, w_tuple2):
     items1 = w_tuple1.wrappeditems
     items2 = w_tuple2.wrappeditems
-    if len(items1) != len(items2):
+    lgt1 = len(items1)
+    lgt2 = len(items2)
+    if lgt1 != lgt2:
         return space.w_False
-    for i in range(len(items1)):
+    for i in range(lgt1):
         item1 = items1[i]
         item2 = items2[i]
         if not space.eq_w(item1, item2):
             return space.w_False
     return space.w_True
 
+ at jit.look_inside_iff(tuple_unroll_condition)
 def lt__Tuple_Tuple(space, w_tuple1, w_tuple2):
     items1 = w_tuple1.wrappeditems
     items2 = w_tuple2.wrappeditems
@@ -137,6 +150,7 @@
     # No more items to compare -- compare sizes
     return space.newbool(len(items1) < len(items2))
 
+ at jit.look_inside_iff(tuple_unroll_condition)
 def gt__Tuple_Tuple(space, w_tuple1, w_tuple2):
     items1 = w_tuple1.wrappeditems
     items2 = w_tuple2.wrappeditems
@@ -161,6 +175,9 @@
 def hash__Tuple(space, w_tuple):
     return space.wrap(hash_tuple(space, w_tuple.wrappeditems))
 
+ at jit.look_inside_iff(lambda space, wrappeditems:
+                     jit.isconstant(len(wrappeditems)) and
+                     len(wrappeditems) < UNROLL_TUPLE_LIMIT)
 def hash_tuple(space, wrappeditems):
     # this is the CPython 2.4 algorithm (changed from 2.3)
     mult = 1000003
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -103,6 +103,7 @@
                           'terminator',
                           '_version_tag?',
                           'name?',
+                          'mro_w?[*]',
                           ]
 
     # for config.objspace.std.getattributeshortcut
@@ -345,9 +346,9 @@
 
         return w_self._lookup_where(name)
 
+    @unroll_safe
     def lookup_starting_at(w_self, w_starttype, name):
         space = w_self.space
-        # XXX Optimize this with method cache
         look = False
         for w_class in w_self.mro_w:
             if w_class is w_starttype:
diff --git a/pypy/rlib/_rffi_stacklet.py b/pypy/rlib/_rffi_stacklet.py
--- a/pypy/rlib/_rffi_stacklet.py
+++ b/pypy/rlib/_rffi_stacklet.py
@@ -3,6 +3,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.rpython.tool import rffi_platform
+from pypy.rlib.rarithmetic import is_emulated_long
 import sys
 
 
@@ -14,7 +15,11 @@
     separate_module_sources = ['#include "src/stacklet/stacklet.c"\n'],
 )
 if sys.platform == 'win32':
-    eci.separate_module_files += (cdir / "src/stacklet/switch_x86_msvc.asm", )
+    if is_emulated_long:
+        asmsrc = 'switch_x64_msvc.asm'
+    else:
+        asmsrc = 'switch_x86_msvc.asm'
+    eci.separate_module_files += (cdir / 'src' / 'stacklet' / asmsrc, )
     eci.export_symbols += (
         'stacklet_newthread',
         'stacklet_deletethread',
diff --git a/pypy/rlib/clibffi.py b/pypy/rlib/clibffi.py
--- a/pypy/rlib/clibffi.py
+++ b/pypy/rlib/clibffi.py
@@ -5,7 +5,7 @@
 from pypy.rpython.tool import rffi_platform
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.rarithmetic import intmask, r_uint
+from pypy.rlib.rarithmetic import intmask, r_uint, is_emulated_long
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.rmmap import alloc
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
@@ -27,6 +27,7 @@
 _MSVC = platform.name == "msvc"
 _MINGW = platform.name == "mingw32"
 _WIN32 = _MSVC or _MINGW
+_WIN64 = _WIN32 and is_emulated_long
 _MAC_OS = platform.name == "darwin"
 _FREEBSD_7 = platform.name == "freebsd7"
 
@@ -119,6 +120,10 @@
          ])
 else:
     libffidir = py.path.local(pypydir).join('translator', 'c', 'src', 'libffi_msvc')
+    if not _WIN64:
+        asm_ifc = 'win32.c'
+    else:
+        asm_ifc = 'win64.asm'
     eci = ExternalCompilationInfo(
         includes = ['ffi.h', 'windows.h'],
         libraries = ['kernel32'],
@@ -126,7 +131,7 @@
         separate_module_sources = separate_module_sources,
         separate_module_files = [libffidir.join('ffi.c'),
                                  libffidir.join('prep_cif.c'),
-                                 libffidir.join('win32.c'),
+                                 libffidir.join(asm_ifc),
                                  libffidir.join('pypy_ffi.c'),
                                  ],
         export_symbols = ['ffi_call', 'ffi_prep_cif', 'ffi_prep_closure',
@@ -142,7 +147,7 @@
     FFI_OK = rffi_platform.ConstantInteger('FFI_OK')
     FFI_BAD_TYPEDEF = rffi_platform.ConstantInteger('FFI_BAD_TYPEDEF')
     FFI_DEFAULT_ABI = rffi_platform.ConstantInteger('FFI_DEFAULT_ABI')
-    if _WIN32:
+    if _WIN32 and not _WIN64:
         FFI_STDCALL = rffi_platform.ConstantInteger('FFI_STDCALL')
 
     FFI_TYPE_STRUCT = rffi_platform.ConstantInteger('FFI_TYPE_STRUCT')
@@ -312,7 +317,7 @@
 FFI_OK = cConfig.FFI_OK
 FFI_BAD_TYPEDEF = cConfig.FFI_BAD_TYPEDEF
 FFI_DEFAULT_ABI = cConfig.FFI_DEFAULT_ABI
-if _WIN32:
+if _WIN32 and not _WIN64:
     FFI_STDCALL = cConfig.FFI_STDCALL
 FFI_TYPE_STRUCT = cConfig.FFI_TYPE_STRUCT
 FFI_CIFP = rffi.COpaquePtr('ffi_cif', compilation_info=eci)
@@ -458,7 +463,7 @@
 FUNCFLAG_USE_LASTERROR = 16
 
 def get_call_conv(flags, from_jit):
-    if _WIN32 and (flags & FUNCFLAG_CDECL == 0):
+    if _WIN32 and not _WIN64 and (flags & FUNCFLAG_CDECL == 0):
         return FFI_STDCALL
     else:
         return FFI_DEFAULT_ABI
diff --git a/pypy/rlib/debug.py b/pypy/rlib/debug.py
--- a/pypy/rlib/debug.py
+++ b/pypy/rlib/debug.py
@@ -1,5 +1,7 @@
 import sys, time
 from pypy.rpython.extregistry import ExtRegistryEntry
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 def ll_assert(x, msg):
     """After translation to C, this becomes an RPyAssert."""
@@ -26,6 +28,7 @@
     llop.debug_print_traceback(lltype.Void)
     llop.debug_fatalerror(lltype.Void, msg)
 fatalerror._dont_inline_ = True
+fatalerror._jit_look_inside_ = False
 fatalerror._annenforceargs_ = [str]
 
 def fatalerror_notb(msg):
@@ -34,6 +37,7 @@
     from pypy.rpython.lltypesystem.lloperation import llop
     llop.debug_fatalerror(lltype.Void, msg)
 fatalerror_notb._dont_inline_ = True
+fatalerror_notb._jit_look_inside_ = False
 fatalerror_notb._annenforceargs_ = [str]
 
 
@@ -333,7 +337,7 @@
     """Give a translation-time error if 'x' is not a plain int
     (e.g. if it's a r_longlong or an r_uint).
     """
-    assert type(x) is int
+    assert is_valid_int(x)
     return x
 
 class Entry(ExtRegistryEntry):
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -392,6 +392,9 @@
 class JitHintError(Exception):
     """Inconsistency in the JIT hints."""
 
+ENABLE_ALL_OPTS = (
+    'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:ffi:unroll')
+
 PARAMETER_DOCS = {
     'threshold': 'number of times a loop has to run for it to become hot',
     'function_threshold': 'number of times a function must run for it to become traced from start',
@@ -402,7 +405,8 @@
     'retrace_limit': 'how many times we can try retracing before giving up',
     'max_retrace_guards': 'number of extra guards a retrace can cause',
     'max_unroll_loops': 'number of extra unrollings a loop can cause',
-    'enable_opts': 'optimizations to enable or all, INTERNAL USE ONLY'
+    'enable_opts': 'INTERNAL USE ONLY: optimizations to enable, or all = %s' %
+                       ENABLE_ALL_OPTS,
     }
 
 PARAMETERS = {'threshold': 1039, # just above 1024, prime
@@ -469,14 +473,16 @@
         # FLOATs.
         if len(self._heuristic_order) < len(livevars):
             from pypy.rlib.rarithmetic import (r_singlefloat, r_longlong,
-                                               r_ulonglong)
+                                               r_ulonglong, r_uint)
             added = False
             for var, value in livevars.items():
                 if var not in self._heuristic_order:
-                    if isinstance(value, (r_longlong, r_ulonglong)):
+                    if (r_ulonglong is not r_uint and
+                            isinstance(value, (r_longlong, r_ulonglong))):
                         assert 0, ("should not pass a r_longlong argument for "
-                                   "now, because on 32-bit machines it would "
-                                   "need to be ordered as a FLOAT")
+                                   "now, because on 32-bit machines it needs "
+                                   "to be ordered as a FLOAT but on 64-bit "
+                                   "machines as an INT")
                     elif isinstance(value, (int, long, r_singlefloat)):
                         kind = '1:INT'
                     elif isinstance(value, float):
diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py
--- a/pypy/rlib/objectmodel.py
+++ b/pypy/rlib/objectmodel.py
@@ -233,20 +233,22 @@
 
 # ____________________________________________________________
 
-def newlist(sizehint=0):
+def newlist_hint(sizehint=0):
     """ Create a new list, but pass a hint how big the size should be
     preallocated
     """
     return []
 
 class Entry(ExtRegistryEntry):
-    _about_ = newlist
+    _about_ = newlist_hint
 
     def compute_result_annotation(self, s_sizehint):
         from pypy.annotation.model import SomeInteger
 
         assert isinstance(s_sizehint, SomeInteger)
-        return self.bookkeeper.newlist()
+        s_l = self.bookkeeper.newlist()
+        s_l.listdef.listitem.resize()
+        return s_l
 
     def specialize_call(self, orig_hop, i_sizehint=None):
         from pypy.rpython.rlist import rtype_newlist
diff --git a/pypy/rlib/rarithmetic.py b/pypy/rlib/rarithmetic.py
--- a/pypy/rlib/rarithmetic.py
+++ b/pypy/rlib/rarithmetic.py
@@ -30,22 +30,54 @@
 
 
 """
-import sys
+import sys, struct
 from pypy.rpython import extregistry
 from pypy.rlib import objectmodel
 
-# set up of machine internals
-_bits = 0
-_itest = 1
-_Ltest = 1L
-while _itest == _Ltest and type(_itest) is int:
-    _itest *= 2
-    _Ltest *= 2
-    _bits += 1
+"""
+Long-term target:
+We want to make pypy very flexible concerning its data type layout.
+This is a larger task for later.
 
-LONG_BIT = _bits+1
-LONG_MASK = _Ltest*2-1
-LONG_TEST = _Ltest
+Short-term target:
+We want to run PyPy on windows 64 bit.
+
+Problem:
+On windows 64 bit, integers are only 32 bit. This is a problem for PyPy
+right now, since it assumes that a c long can hold a pointer.
+We therefore set up the target machine constants to obey this rule.
+Right now this affects 64 bit Python only on windows.
+
+Note: We use the struct module, because the array module doesn's support
+all typecodes.
+"""
+
+def _get_bitsize(typecode):
+    return len(struct.pack(typecode, 1)) * 8
+
+_long_typecode = 'l'
+if _get_bitsize('P') > _get_bitsize('l'):
+    _long_typecode = 'P'
+
+def _get_long_bit():
+    # whatever size a long has, make it big enough for a pointer.
+    return _get_bitsize(_long_typecode)
+
+# exported for now for testing array values. 
+# might go into its own module.
+def get_long_pattern(x):
+    """get the bit pattern for a long, adjusted to pointer size"""
+    return struct.pack(_long_typecode, x)
+
+# used in tests for ctypes and for genc and friends
+# to handle the win64 special case:
+is_emulated_long = _long_typecode <> 'l'
+    
+LONG_BIT = _get_long_bit()
+LONG_MASK = (2**LONG_BIT)-1
+LONG_TEST = 2**(LONG_BIT-1)
+
+# XXX this is a good guess, but what if a long long is 128 bit?
 LONGLONG_BIT  = 64
 LONGLONG_MASK = (2**LONGLONG_BIT)-1
 LONGLONG_TEST = 2**(LONGLONG_BIT-1)
@@ -55,12 +87,18 @@
     LONG_BIT_SHIFT += 1
     assert LONG_BIT_SHIFT < 99, "LONG_BIT_SHIFT value not found?"
 
+"""
+int is no longer necessarily the same size as the target int.
+We therefore can no longer use the int type as it is, but need
+to use long everywhere.
+"""
+    
 def intmask(n):
-    if isinstance(n, int):
-        return int(n)   # possibly bool->int
     if isinstance(n, objectmodel.Symbolic):
         return n        # assume Symbolics don't overflow
     assert not isinstance(n, float)
+    if is_valid_int(n):
+        return int(n)
     n = long(n)
     n &= LONG_MASK
     if n >= LONG_TEST:
@@ -95,7 +133,12 @@
         r_class.BITS == LONG_BIT and r_class.SIGNED)
 _should_widen_type._annspecialcase_ = 'specialize:memo'
 
-del _bits, _itest, _Ltest
+# the replacement for sys.maxint
+maxint = int(LONG_TEST - 1)
+
+def is_valid_int(r):
+    return isinstance(r, (int, long)) and (
+        -maxint - 1 <= r <= maxint)
 
 def ovfcheck(r):
     "NOT_RPYTHON"
@@ -103,8 +146,10 @@
     # raise OverflowError if the operation did overflow
     assert not isinstance(r, r_uint), "unexpected ovf check on unsigned"
     assert not isinstance(r, r_longlong), "ovfcheck not supported on r_longlong"
-    assert not isinstance(r,r_ulonglong),"ovfcheck not supported on r_ulonglong"
-    if type(r) is long:
+    assert not isinstance(r, r_ulonglong), "ovfcheck not supported on r_ulonglong"
+    if type(r) is long and not is_valid_int(r):
+        # checks only if applicable to r's type.
+        # this happens in the garbage collector.
         raise OverflowError, "signed integer expression did overflow"
     return r
 
@@ -418,6 +463,9 @@
 r_longlong = build_int('r_longlong', True, 64)
 r_ulonglong = build_int('r_ulonglong', False, 64)
 
+r_long = build_int('r_long', True, 32)
+r_ulong = build_int('r_ulong', False, 32)
+
 longlongmax = r_longlong(LONGLONG_TEST - 1)
 
 if r_longlong is not r_int:
@@ -425,6 +473,14 @@
 else:
     r_int64 = int
 
+# needed for ll_os_stat.time_t_to_FILE_TIME in the 64 bit case
+if r_long is not r_int:
+    r_uint32 = r_ulong
+else:
+    r_uint32 = r_uint
+
+# needed for ll_time.time_sleep_llimpl
+maxint32 = int((1 << 31) -1)
 
 # the 'float' C type
 
diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -1,5 +1,5 @@
 from pypy.rlib.rarithmetic import LONG_BIT, intmask, r_uint, r_ulonglong
-from pypy.rlib.rarithmetic import ovfcheck, r_longlong, widen
+from pypy.rlib.rarithmetic import ovfcheck, r_longlong, widen, is_valid_int
 from pypy.rlib.rarithmetic import most_neg_value_of_same_type
 from pypy.rlib.rfloat import isfinite
 from pypy.rlib.debug import make_sure_not_resized, check_regular_int
@@ -44,21 +44,19 @@
 
 
 def _mask_digit(x):
-    if not we_are_translated():
-        assert type(x) is not long, "overflow occurred!"
     return intmask(x & MASK)
 _mask_digit._annspecialcase_ = 'specialize:argtype(0)'
 
 def _widen_digit(x):
     if not we_are_translated():
-        assert type(x) is int, "widen_digit() takes an int, got a %r" % type(x)
+        assert is_valid_int(x), "widen_digit() takes an int, got a %r" % type(x)
     if SHIFT <= 15:
         return int(x)
     return r_longlong(x)
 
 def _store_digit(x):
     if not we_are_translated():
-        assert type(x) is int, "store_digit() takes an int, got a %r" % type(x)
+        assert is_valid_int(x), "store_digit() takes an int, got a %r" % type(x)
     if SHIFT <= 15:
         return rffi.cast(rffi.SHORT, x)
     elif SHIFT <= 31:
diff --git a/pypy/rlib/rdtoa.py b/pypy/rlib/rdtoa.py
--- a/pypy/rlib/rdtoa.py
+++ b/pypy/rlib/rdtoa.py
@@ -58,8 +58,8 @@
         try:
             result = dg_strtod(ll_input, end_ptr)
 
-            endpos = (rffi.cast(rffi.LONG, end_ptr[0]) -
-                      rffi.cast(rffi.LONG, ll_input))
+            endpos = (rffi.cast(lltype.Signed, end_ptr[0]) -
+                      rffi.cast(lltype.Signed, ll_input))
 
             if endpos == 0 or endpos < len(input):
                 raise ValueError("invalid input at position %d" % (endpos,))
@@ -244,8 +244,8 @@
                     # The only failure mode is no memory
                     raise MemoryError
                 try:
-                    buflen = (rffi.cast(rffi.LONG, end_ptr[0]) -
-                              rffi.cast(rffi.LONG, digits))
+                    buflen = (rffi.cast(lltype.Signed, end_ptr[0]) -
+                              rffi.cast(lltype.Signed, digits))
                     sign = rffi.cast(lltype.Signed, sign_ptr[0])
 
                     # Handle nan and inf
diff --git a/pypy/rlib/rerased.py b/pypy/rlib/rerased.py
--- a/pypy/rlib/rerased.py
+++ b/pypy/rlib/rerased.py
@@ -24,11 +24,11 @@
 from pypy.rpython.lltypesystem.rclass import OBJECTPTR
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.error import TyperError
-
+from pypy.rlib.rarithmetic import is_valid_int
 
 
 def erase_int(x):
-    assert isinstance(x, int)
+    assert is_valid_int(x)
     res = 2 * x + 1
     if res > sys.maxint or res < -sys.maxint - 1:
         raise OverflowError
@@ -36,7 +36,7 @@
 
 def unerase_int(y):
     assert y._identity is _identity_for_ints
-    assert isinstance(y._x, int)
+    assert is_valid_int(y._x)
     return y._x
 
 
diff --git a/pypy/rlib/rfloat.py b/pypy/rlib/rfloat.py
--- a/pypy/rlib/rfloat.py
+++ b/pypy/rlib/rfloat.py
@@ -295,7 +295,7 @@
     return z
 
 INFINITY = 1e200 * 1e200
-NAN = INFINITY / INFINITY
+NAN = abs(INFINITY / INFINITY)    # bah, INF/INF gives us -NAN?
 
 try:
     # Try to get math functions added in 2.6.
@@ -375,8 +375,7 @@
 
     def log1p(x):
         "NOT_RPYTHON"
-        from pypy.rlib import rfloat
-        if abs(x) < rfloat.DBL_EPSILON // 2.:
+        if abs(x) < DBL_EPSILON // 2.:
             return x
         elif -.5 <= x <= 1.:
             y = 1. + x
diff --git a/pypy/rlib/rstruct/ieee.py b/pypy/rlib/rstruct/ieee.py
--- a/pypy/rlib/rstruct/ieee.py
+++ b/pypy/rlib/rstruct/ieee.py
@@ -4,7 +4,7 @@
 
 import math
 
-from pypy.rlib import rarithmetic, rfloat, objectmodel
+from pypy.rlib import rarithmetic, rfloat, objectmodel, jit
 from pypy.rlib.rarithmetic import r_ulonglong
 
 
@@ -135,14 +135,15 @@
     return ((sign << BITS - 1) | (exp << MANT_DIG - 1)) | mant
 
 
+ at jit.unroll_safe
 def pack_float(result, x, size, be):
-    l = [] if be else result
+    l = []
     unsigned = float_pack(x, size)
     for i in range(size):
         l.append(chr((unsigned >> (i * 8)) & 0xFF))
     if be:
         l.reverse()
-        result.extend(l)
+    result.append("".join(l))
 
 
 def unpack_float(s, be):
diff --git a/pypy/rlib/rstruct/nativefmttable.py b/pypy/rlib/rstruct/nativefmttable.py
--- a/pypy/rlib/rstruct/nativefmttable.py
+++ b/pypy/rlib/rstruct/nativefmttable.py
@@ -3,6 +3,7 @@
 The table 'native_fmttable' is also used by pypy.module.array.interp_array.
 """
 import struct
+from pypy.rlib import jit
 from pypy.rlib.rstruct import standardfmttable as std
 from pypy.rlib.rstruct.error import StructError
 from pypy.rpython.tool import rffi_platform
@@ -25,12 +26,15 @@
 double_buf = lltype.malloc(rffi.DOUBLEP.TO, 1, flavor='raw', immortal=True)
 float_buf = lltype.malloc(rffi.FLOATP.TO, 1, flavor='raw', immortal=True)
 
+ at jit.dont_look_inside
+def double_to_ccharp(doubleval):
+    double_buf[0] = doubleval
+    return rffi.cast(rffi.CCHARP, double_buf)
+
 def pack_double(fmtiter):
     doubleval = fmtiter.accept_float_arg()
-    double_buf[0] = doubleval
-    p = rffi.cast(rffi.CCHARP, double_buf)
-    for i in range(sizeof_double):
-        fmtiter.result.append(p[i])
+    p = double_to_ccharp(doubleval)
+    fmtiter.result.append_charpsize(p, rffi.sizeof(rffi.DOUBLE))
 
 @specialize.argtype(0)
 def unpack_double(fmtiter):
@@ -41,13 +45,16 @@
     doubleval = double_buf[0]
     fmtiter.appendobj(doubleval)
 
+ at jit.dont_look_inside
+def float_to_ccharp(floatval):
+    float_buf[0] = floatval
+    return rffi.cast(rffi.CCHARP, float_buf)
+
 def pack_float(fmtiter):
     doubleval = fmtiter.accept_float_arg()
     floatval = r_singlefloat(doubleval)
-    float_buf[0] = floatval
-    p = rffi.cast(rffi.CCHARP, float_buf)
-    for i in range(sizeof_float):
-        fmtiter.result.append(p[i])
+    p = float_to_ccharp(floatval)
+    fmtiter.result.append_charpsize(p, rffi.sizeof(rffi.FLOAT))
 
 @specialize.argtype(0)
 def unpack_float(fmtiter):
diff --git a/pypy/rlib/rstruct/standardfmttable.py b/pypy/rlib/rstruct/standardfmttable.py
--- a/pypy/rlib/rstruct/standardfmttable.py
+++ b/pypy/rlib/rstruct/standardfmttable.py
@@ -21,8 +21,7 @@
 # ____________________________________________________________
 
 def pack_pad(fmtiter, count):
-    for i in range(count):
-        fmtiter.result.append('\x00')
+    fmtiter.result.append_multiple_char('\x00', count)
 
 def pack_char(fmtiter):
     string = fmtiter.accept_str_arg()
@@ -38,11 +37,10 @@
 def pack_string(fmtiter, count):
     string = fmtiter.accept_str_arg()
     if len(string) < count:
-        fmtiter.result += string
-        for i in range(len(string), count):
-            fmtiter.result.append('\x00')
+        fmtiter.result.append(string)
+        fmtiter.result.append_multiple_char('\x00', count - len(string))
     else:
-        fmtiter.result += string[:count]
+        fmtiter.result.append_slice(string, 0, count)
 
 def pack_pascal(fmtiter, count):
     string = fmtiter.accept_str_arg()
@@ -56,9 +54,8 @@
     else:
         prefixchar = chr(prefix)
     fmtiter.result.append(prefixchar)
-    fmtiter.result += string[:prefix]
-    for i in range(1 + prefix, count):
-        fmtiter.result.append('\x00')
+    fmtiter.result.append_slice(string, 0, prefix)
+    fmtiter.result.append_multiple_char('\x00', count - (1 + prefix))
 
 def make_float_packer(size):
     def packer(fmtiter):
diff --git a/pypy/rlib/rwin32.py b/pypy/rlib/rwin32.py
--- a/pypy/rlib/rwin32.py
+++ b/pypy/rlib/rwin32.py
@@ -133,8 +133,8 @@
         # Prior to Visual Studio 8, the MSVCRT dll doesn't export the
         # _dosmaperr() function, which is available only when compiled
         # against the static CRT library.
-        from pypy.translator.platform import platform, Windows
-        static_platform = Windows()
+        from pypy.translator.platform import host_factory
+        static_platform = host_factory()
         if static_platform.name == 'msvc':
             static_platform.cflags = ['/MT']  # static CRT
             static_platform.version = 0       # no manifest
diff --git a/pypy/rlib/test/test_jit.py b/pypy/rlib/test/test_jit.py
--- a/pypy/rlib/test/test_jit.py
+++ b/pypy/rlib/test/test_jit.py
@@ -2,6 +2,7 @@
 from pypy.conftest import option
 from pypy.rlib.jit import hint, we_are_jitted, JitDriver, elidable_promote
 from pypy.rlib.jit import JitHintError, oopspec, isconstant
+from pypy.rlib.rarithmetic import r_uint
 from pypy.translator.translator import TranslationContext, graphof
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 from pypy.rpython.lltypesystem import lltype
@@ -178,6 +179,11 @@
                    myjitdriver.jit_merge_point, i1=42, r1=A(), r2=None, f1=3.5)
         assert "got ['2:REF', '1:INT', '2:REF', '3:FLOAT']" in repr(e.value)
 
+    def test_argument_order_accept_r_uint(self):
+        # this used to fail on 64-bit, because r_uint == r_ulonglong
+        myjitdriver = JitDriver(greens=['i1'], reds=[])
+        myjitdriver.jit_merge_point(i1=r_uint(42))
+
 
 class TestJITLLtype(BaseTestJIT, LLRtypeMixin):
     pass
diff --git a/pypy/rlib/test/test_objectmodel.py b/pypy/rlib/test/test_objectmodel.py
--- a/pypy/rlib/test/test_objectmodel.py
+++ b/pypy/rlib/test/test_objectmodel.py
@@ -442,7 +442,7 @@
 def test_newlist():
     from pypy.annotation.model import SomeInteger
     def f(z):
-        x = newlist(sizehint=38)
+        x = newlist_hint(sizehint=38)
         if z < 0:
             x.append(1)
         return len(x)
@@ -456,7 +456,7 @@
 def test_newlist_nonconst():
     from pypy.annotation.model import SomeInteger
     def f(z):
-        x = newlist(sizehint=z)
+        x = newlist_hint(sizehint=z)
         return len(x)
 
     graph = getgraph(f, [SomeInteger()])
diff --git a/pypy/rpython/lltypesystem/llarena.py b/pypy/rpython/lltypesystem/llarena.py
--- a/pypy/rpython/lltypesystem/llarena.py
+++ b/pypy/rpython/lltypesystem/llarena.py
@@ -1,5 +1,7 @@
 import array, weakref
 from pypy.rpython.lltypesystem import llmemory
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 # An "arena" is a large area of memory which can hold a number of
 # objects, not necessarily all of the same type or size.  It's used by
@@ -164,7 +166,7 @@
         return '<arenaaddr %s + %d>' % (self.arena, self.offset)
 
     def __add__(self, other):
-        if isinstance(other, (int, long)):
+        if is_valid_int(other):
             position = self.offset + other
         elif isinstance(other, llmemory.AddressOffset):
             # this is really some Do What I Mean logic.  There are two
@@ -184,7 +186,7 @@
     def __sub__(self, other):
         if isinstance(other, llmemory.AddressOffset):
             other = llmemory.raw_malloc_usage(other)
-        if isinstance(other, (int, long)):
+        if is_valid_int(other):
             return self.arena.getaddr(self.offset - other)
         if isinstance(other, fakearenaaddress):
             if self.arena is not other.arena:
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -7,7 +7,7 @@
 from pypy.tool.identity_dict import identity_dict
 from pypy.tool import leakfinder
 from types import NoneType
-from sys import maxint
+from pypy.rlib.rarithmetic import maxint, is_valid_int, is_emulated_long
 import weakref
 
 class State(object):
@@ -681,6 +681,11 @@
     number = _numbertypes[type] = Number(name, type)
     return number
 
+if is_emulated_long:
+    SignedFmt = 'q'
+else:
+    SignedFmt = 'l'
+
 Signed   = build_number("Signed", int)
 Unsigned = build_number("Unsigned", r_uint)
 SignedLongLong = build_number("SignedLongLong", r_longlong)
@@ -1654,7 +1659,7 @@
     __slots__ = ('items',)
 
     def __init__(self, TYPE, n, initialization=None, parent=None, parentindex=None):
-        if not isinstance(n, int):
+        if not is_valid_int(n):
             raise TypeError, "array length must be an int"
         if n < 0:
             raise ValueError, "negative array length"
diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -4,6 +4,8 @@
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.lltypesystem.lloperation import opimpls
 from pypy.rlib import debug
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 # ____________________________________________________________
 # Implementation of the 'canfold' operations
@@ -22,14 +24,14 @@
 from pypy.rpython.lltypesystem.llmemory import AddressAsInt
 
 if r_longlong is r_int:
-    r_longlong_arg = (r_longlong, int)
-    r_longlong_result = int
+    r_longlong_arg = (r_longlong, int, long)
+    r_longlong_result = long # XXX was int
 else:
     r_longlong_arg = r_longlong
     r_longlong_result = r_longlong
 
 argtype_by_name = {
-    'int': int,
+    'int': (int, long),
     'float': float,
     'uint': r_uint,
     'llong': r_longlong_arg,
@@ -173,7 +175,7 @@
 
 def op_direct_ptradd(obj, index):
     checkptr(obj)
-    assert isinstance(index, int)
+    assert is_valid_int(index)
     return lltype.direct_ptradd(obj, index)
 
 
@@ -182,29 +184,30 @@
     return not b
 
 def op_int_add(x, y):
-    if not isinstance(x, (int, llmemory.AddressOffset)):
+    if not isinstance(x, (int, long, llmemory.AddressOffset)):
         from pypy.rpython.lltypesystem import llgroup
         assert isinstance(x, llgroup.CombinedSymbolic)
-    assert isinstance(y, (int, llmemory.AddressOffset))
+    assert isinstance(y, (int, long, llmemory.AddressOffset))
     return intmask(x + y)
 
 def op_int_sub(x, y):
-    if not isinstance(x, int):
+    if not is_valid_int(x):
         from pypy.rpython.lltypesystem import llgroup
         assert isinstance(x, llgroup.CombinedSymbolic)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return intmask(x - y)
 
 def op_int_ge(x, y):
     # special case for 'AddressOffset >= 0'
-    assert isinstance(x, (int, llmemory.AddressOffset))
-    assert isinstance(y, int)
+    assert isinstance(x, (int, long, llmemory.AddressOffset))
+    assert is_valid_int(y)
     return x >= y
 
 def op_int_lt(x, y):
     # special case for 'AddressOffset < 0'
-    assert isinstance(x, (int, llmemory.AddressOffset))
-    assert isinstance(y, int)
+    # hack for win64
+    assert isinstance(x, (int, long, llmemory.AddressOffset))
+    assert is_valid_int(y)
     return x < y
 
 def op_int_between(a, b, c):
@@ -214,50 +217,51 @@
     return a <= b < c
 
 def op_int_and(x, y):
-    if not isinstance(x, int):
+    if not is_valid_int(x):
         from pypy.rpython.lltypesystem import llgroup
         assert isinstance(x, llgroup.CombinedSymbolic)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return x & y
 
 def op_int_or(x, y):
-    if not isinstance(x, int):
+    if not is_valid_int(x):
         from pypy.rpython.lltypesystem import llgroup
         assert isinstance(x, llgroup.CombinedSymbolic)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return x | y
 
 def op_int_xor(x, y):
     # used in computing hashes
     if isinstance(x, AddressAsInt): x = llmemory.cast_adr_to_int(x.adr)
     if isinstance(y, AddressAsInt): y = llmemory.cast_adr_to_int(y.adr)
-    assert isinstance(x, int)
-    assert isinstance(y, int)
+    assert is_valid_int(x)
+    assert is_valid_int(y)
     return x ^ y
 
 def op_int_mul(x, y):
-    assert isinstance(x, (int, llmemory.AddressOffset))
-    assert isinstance(y, (int, llmemory.AddressOffset))
+    assert isinstance(x, (int, long, llmemory.AddressOffset))
+    assert isinstance(y, (int, long, llmemory.AddressOffset))
     return intmask(x * y)
 
 def op_int_rshift(x, y):
-    if not isinstance(x, int):
+    if not is_valid_int(x):
         from pypy.rpython.lltypesystem import llgroup
         assert isinstance(x, llgroup.CombinedSymbolic)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return x >> y
 
 def op_int_floordiv(x, y):
-    assert isinstance(x, (int, llmemory.AddressOffset))
-    assert isinstance(y, (int, llmemory.AddressOffset))
+    # hack for win64
+    assert isinstance(x, (int, long, llmemory.AddressOffset))
+    assert isinstance(y, (int, long, llmemory.AddressOffset))
     r = x//y
     if x^y < 0 and x%y != 0:
         r += 1
     return r
 
 def op_int_mod(x, y):
-    assert isinstance(x, (int, llmemory.AddressOffset))
-    assert isinstance(y, (int, llmemory.AddressOffset))
+    assert isinstance(x, (int, long, llmemory.AddressOffset))
+    assert isinstance(y, (int, long, llmemory.AddressOffset))
     r = x%y
     if x^y < 0 and x%y != 0:
         r -= y
@@ -281,22 +285,22 @@
 
 def op_uint_lshift(x, y):
     assert isinstance(x, r_uint)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return r_uint(x << y)
 
 def op_uint_rshift(x, y):
     assert isinstance(x, r_uint)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return r_uint(x >> y)
 
 def op_llong_lshift(x, y):
     assert isinstance(x, r_longlong_arg)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return r_longlong_result(x << y)
 
 def op_llong_rshift(x, y):
     assert isinstance(x, r_longlong_arg)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return r_longlong_result(x >> y)
 
 def op_ullong_lshift(x, y):
@@ -306,7 +310,7 @@
 
 def op_ullong_rshift(x, y):
     assert isinstance(x, r_ulonglong)
-    assert isinstance(y, int)
+    assert is_valid_int(y)
     return r_ulonglong(x >> y)
 
 def op_same_as(x):
@@ -318,7 +322,8 @@
 op_cast_primitive.need_result_type = True
 
 def op_cast_int_to_float(i):
-    assert type(i) is int
+    # assert type(i) is int
+    assert is_valid_int(i)
     return float(i)
 
 def op_cast_uint_to_float(u):
@@ -340,7 +345,8 @@
     return ui + li
 
 def op_cast_int_to_char(b):
-    assert type(b) is int
+    #assert type(b) is int
+    assert is_valid_int(b)
     return chr(b)
 
 def op_cast_bool_to_int(b):
@@ -384,11 +390,12 @@
     return ord(b)
 
 def op_cast_int_to_unichar(b):
-    assert type(b) is int
+    assert is_valid_int(b)
     return unichr(b)
 
 def op_cast_int_to_uint(b):
-    assert type(b) is int
+    # assert type(b) is int
+    assert is_valid_int(b)
     return r_uint(b)
 
 def op_cast_uint_to_int(b):
@@ -396,7 +403,7 @@
     return intmask(b)
 
 def op_cast_int_to_longlong(b):
-    assert type(b) is int
+    assert is_valid_int(b)
     return r_longlong_result(b)
 
 def op_truncate_longlong_to_int(b):
@@ -570,7 +577,7 @@
     if isinstance(memberoffset, llgroup.GroupMemberOffset):
         return memberoffset.index != 0
     else:
-        assert isinstance(memberoffset, int)
+        assert is_valid_int(memberoffset)
         return memberoffset != 0
 
 def op_extract_ushort(combinedoffset):
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -433,7 +433,8 @@
         TYPES.append(name)
 TYPES += ['signed char', 'unsigned char',
           'long long', 'unsigned long long',
-          'size_t', 'time_t', 'wchar_t']
+          'size_t', 'time_t', 'wchar_t',
+          'uintptr_t', 'intptr_t']
 if os.name != 'nt':
     TYPES.append('mode_t')
     TYPES.append('pid_t')
@@ -617,8 +618,6 @@
 # (use SIGNEDCHAR or UCHAR for the small integer types)
 CHAR = lltype.Char
 
-INTPTR_T = SSIZE_T
-
 # double
 DOUBLE = lltype.Float
 LONGDOUBLE = lltype.LongFloat
diff --git a/pypy/rpython/lltypesystem/rlist.py b/pypy/rpython/lltypesystem/rlist.py
--- a/pypy/rpython/lltypesystem/rlist.py
+++ b/pypy/rpython/lltypesystem/rlist.py
@@ -60,7 +60,6 @@
         ITEMARRAY = GcArray(ITEM,
                             adtmeths = ADTIFixedList({
                                  "ll_newlist": ll_fixed_newlist,
-                                 "ll_newlist_hint": ll_fixed_newlist,
                                  "ll_newemptylist": ll_fixed_newemptylist,
                                  "ll_length": ll_fixed_length,
                                  "ll_items": ll_fixed_items,
@@ -271,7 +270,7 @@
     l.items = malloc(LIST.items.TO, lengthhint)
     return l
 ll_newlist_hint = typeMethod(ll_newlist_hint)
-ll_newlist_hint.oopspec = 'newlist(lengthhint)'
+ll_newlist_hint.oopspec = 'newlist_hint(lengthhint)'
 
 # should empty lists start with no allocated memory, or with a preallocated
 # minimal number of entries?  XXX compare memory usage versus speed, and
@@ -315,16 +314,16 @@
 
 # fixed size versions
 
+ at typeMethod
 def ll_fixed_newlist(LIST, length):
     ll_assert(length >= 0, "negative fixed list length")
     l = malloc(LIST, length)
     return l
-ll_fixed_newlist = typeMethod(ll_fixed_newlist)
 ll_fixed_newlist.oopspec = 'newlist(length)'
 
+ at typeMethod
 def ll_fixed_newemptylist(LIST):
     return ll_fixed_newlist(LIST, 0)
-ll_fixed_newemptylist = typeMethod(ll_fixed_newemptylist)
 
 def ll_fixed_length(l):
     return len(l)
@@ -392,7 +391,11 @@
                                          ('list', r_list.lowleveltype),
                                          ('index', Signed)))
         self.ll_listiter = ll_listiter
-        self.ll_listnext = ll_listnext
+        if (isinstance(r_list, FixedSizeListRepr)
+                and not r_list.listitem.mutated):
+            self.ll_listnext = ll_listnext_foldable
+        else:
+            self.ll_listnext = ll_listnext
         self.ll_getnextindex = ll_getnextindex
 
 def ll_listiter(ITERPTR, lst):
@@ -409,5 +412,14 @@
     iter.index = index + 1      # cannot overflow because index < l.length
     return l.ll_getitem_fast(index)
 
+def ll_listnext_foldable(iter):
+    from pypy.rpython.rlist import ll_getitem_foldable_nonneg
+    l = iter.list
+    index = iter.index
+    if index >= l.ll_length():
+        raise StopIteration
+    iter.index = index + 1      # cannot overflow because index < l.length
+    return ll_getitem_foldable_nonneg(l, index)
+
 def ll_getnextindex(iter):
     return iter.index
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -62,6 +62,14 @@
     @jit.oopspec('stroruni.copy_contents(src, dst, srcstart, dststart, length)')
     @enforceargs(None, None, int, int, int)
     def copy_string_contents(src, dst, srcstart, dststart, length):
+        """Copies 'length' characters from the 'src' string to the 'dst'
+        string, starting at position 'srcstart' and 'dststart'."""
+        # xxx Warning: don't try to do this at home.  It relies on a lot
+        # of details to be sure that it works correctly in all cases.
+        # Notably: no GC operation at all from the first cast_ptr_to_adr()
+        # because it might move the strings.  The keepalive_until_here()
+        # are obscurely essential to make sure that the strings stay alive
+        # longer than the raw_memcopy().
         assert srcstart >= 0
         assert dststart >= 0
         assert length >= 0
diff --git a/pypy/rpython/lltypesystem/test/test_llmemory.py b/pypy/rpython/lltypesystem/test/test_llmemory.py
--- a/pypy/rpython/lltypesystem/test/test_llmemory.py
+++ b/pypy/rpython/lltypesystem/test/test_llmemory.py
@@ -1,6 +1,7 @@
 from pypy.rpython.lltypesystem.llmemory import *
 from pypy.rpython.lltypesystem import lltype
 from pypy.rpython.test.test_llinterp import interpret
+from pypy.rlib.rarithmetic import is_valid_int
 import py
 
 def test_simple():
@@ -639,12 +640,12 @@
     assert cast_int_to_adr(0) == NULL
     #
     i = cast_adr_to_int(adr, mode="emulated")
-    assert type(i) is int
+    assert is_valid_int(i)
     i = cast_adr_to_int(NULL, mode="emulated")
-    assert type(i) is int and i == 0
+    assert is_valid_int(i) and i == 0
     #
     i = cast_adr_to_int(adr, mode="forced")
-    assert type(i) is int
+    assert is_valid_int(i)
     #assert cast_int_to_adr(i) == adr -- depends on ll2ctypes details
     i = cast_adr_to_int(NULL, mode="forced")
-    assert type(i) is int and i == 0
+    assert is_valid_int(i) and i == 0
diff --git a/pypy/rpython/memory/gc/inspector.py b/pypy/rpython/memory/gc/inspector.py
--- a/pypy/rpython/memory/gc/inspector.py
+++ b/pypy/rpython/memory/gc/inspector.py
@@ -109,7 +109,7 @@
         self.gc = gc
         self.gcflag = gc.gcflag_extra
         self.fd = rffi.cast(rffi.INT, fd)
-        self.writebuffer = lltype.malloc(rffi.LONGP.TO, self.BUFSIZE,
+        self.writebuffer = lltype.malloc(rffi.SIGNEDP.TO, self.BUFSIZE,
                                          flavor='raw')
         self.buf_count = 0
         if self.gcflag == 0:
diff --git a/pypy/rpython/memory/gc/markcompact.py b/pypy/rpython/memory/gc/markcompact.py
--- a/pypy/rpython/memory/gc/markcompact.py
+++ b/pypy/rpython/memory/gc/markcompact.py
@@ -11,6 +11,8 @@
 from pypy.rlib.objectmodel import we_are_translated, running_on_llinterp
 from pypy.rpython.lltypesystem import rffi
 from pypy.rpython.memory.gcheader import GCHeaderBuilder
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 # Mark'n'compact garbage collector
 #
@@ -353,7 +355,7 @@
         # like header(), but asserts that we have a forwarding header
         hdr = MovingGCBase.header(self, addr)
         if not we_are_translated():
-            assert isinstance(hdr.tid, int)
+            assert is_valid_int(hdr.tid)
         return hdr
 
     def combine(self, typeid16, flags):
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -608,6 +608,11 @@
         specified as 0 if the object is not varsized.  The returned
         object is fully initialized and zero-filled."""
         #
+        # Here we really need a valid 'typeid', not 0 (as the JIT might
+        # try to send us if there is still a bug).
+        ll_assert(bool(self.combine(typeid, 0)),
+                  "external_malloc: typeid == 0")
+        #
         # Compute the total size, carefully checking for overflows.
         size_gc_header = self.gcheaderbuilder.size_gc_header
         nonvarsize = size_gc_header + self.fixed_size(typeid)
diff --git a/pypy/rpython/memory/gctransform/asmgcroot.py b/pypy/rpython/memory/gctransform/asmgcroot.py
--- a/pypy/rpython/memory/gctransform/asmgcroot.py
+++ b/pypy/rpython/memory/gctransform/asmgcroot.py
@@ -442,6 +442,8 @@
         ll_assert(location >= 0, "negative location")
         kind = location & LOC_MASK
         offset = location & ~ LOC_MASK
+        if IS_64_BITS:
+            offset <<= 1
         if kind == LOC_REG:   # register
             if location == LOC_NOWHERE:
                 return llmemory.NULL
diff --git a/pypy/rpython/memory/lltypelayout.py b/pypy/rpython/memory/lltypelayout.py
--- a/pypy/rpython/memory/lltypelayout.py
+++ b/pypy/rpython/memory/lltypelayout.py
@@ -1,4 +1,5 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena
+from pypy.rlib.rarithmetic import is_emulated_long
 
 import struct
 
@@ -12,7 +13,11 @@
                     lltype.Float:           "d",
                     llmemory.Address:       "P",
                     }
-
+if is_emulated_long:
+    primitive_to_fmt.update( {
+        lltype.Signed:     "q",
+        lltype.Unsigned:   "Q",
+        } )
 
 #___________________________________________________________________________
 # Utility functions that know about the memory layout of the lltypes
diff --git a/pypy/rpython/memory/test/test_transformed_gc.py b/pypy/rpython/memory/test/test_transformed_gc.py
--- a/pypy/rpython/memory/test/test_transformed_gc.py
+++ b/pypy/rpython/memory/test/test_transformed_gc.py
@@ -737,7 +737,7 @@
         def f():
             from pypy.rpython.lltypesystem import rffi
             alist = [A() for i in range(50)]
-            idarray = lltype.malloc(rffi.LONGP.TO, len(alist), flavor='raw')
+            idarray = lltype.malloc(rffi.SIGNEDP.TO, len(alist), flavor='raw')
             # Compute the id of all the elements of the list.  The goal is
             # to not allocate memory, so that if the GC needs memory to
             # remember the ids, it will trigger some collections itself
diff --git a/pypy/rpython/module/ll_os_stat.py b/pypy/rpython/module/ll_os_stat.py
--- a/pypy/rpython/module/ll_os_stat.py
+++ b/pypy/rpython/module/ll_os_stat.py
@@ -319,6 +319,7 @@
 
     def attributes_to_mode(attributes):
         m = 0
+        attributes = intmask(attributes)
         if attributes & win32traits.FILE_ATTRIBUTE_DIRECTORY:
             m |= win32traits._S_IFDIR | 0111 # IFEXEC for user,group,other
         else:
diff --git a/pypy/rpython/module/test/test_ll_os.py b/pypy/rpython/module/test/test_ll_os.py
--- a/pypy/rpython/module/test/test_ll_os.py
+++ b/pypy/rpython/module/test/test_ll_os.py
@@ -80,8 +80,12 @@
         pwd = os.getcwd()
         import ctypes
         buf = ctypes.create_string_buffer(1000)
-        ctypes.windll.kernel32.GetEnvironmentVariableA('=%c:' % pwd[0], buf, 1000)
-        assert str(buf.value) == pwd
+        len = ctypes.windll.kernel32.GetEnvironmentVariableA('=%c:' % pwd[0], buf, 1000)
+        if (len == 0) and "WINGDB_PYTHON" in os.environ:
+            # the ctypes call seems not to work in the Wing debugger
+            return
+        assert str(buf.value).lower() == pwd
+        # ctypes returns the drive letter in uppercase, os.getcwd does not
 
     pwd = os.getcwd()
     try:
diff --git a/pypy/rpython/module/test/test_ll_os_stat.py b/pypy/rpython/module/test/test_ll_os_stat.py
--- a/pypy/rpython/module/test/test_ll_os_stat.py
+++ b/pypy/rpython/module/test/test_ll_os_stat.py
@@ -26,7 +26,7 @@
             assert wstat(unicode(f)).st_mtime == expected
 
         check('c:/')
-        check('c:/temp')
+        check(os.environ['TEMP'])
         check('c:/pagefile.sys')
 
     def test_fstat(self):
diff --git a/pypy/rpython/module/test/test_posix.py b/pypy/rpython/module/test/test_posix.py
--- a/pypy/rpython/module/test/test_posix.py
+++ b/pypy/rpython/module/test/test_posix.py
@@ -1,6 +1,8 @@
 import py
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
 from pypy.tool.udir import udir
+from pypy.rlib.rarithmetic import is_valid_int
+
 import os
 exec 'import %s as posix' % os.name
 
@@ -18,10 +20,10 @@
 
     def test_open(self):
         def f():
-            ff = posix.open(path,posix.O_RDONLY,0777)
+            ff = posix.open(path, posix.O_RDONLY, 0777)
             return ff
-        func = self.interpret(f,[])
-        assert type(func) == int
+        func = self.interpret(f, [])
+        assert is_valid_int(func)
 
     def test_fstat(self):
         def fo(fi):
@@ -61,25 +63,25 @@
         assert isinstance(times, tuple)
         assert len(times) == 5
         for value in times:
-            assert isinstance(value, int)
+            assert is_valid_int(value)
 
 
     def test_lseek(self):
-        def f(fi,pos):
-            posix.lseek(fi,pos,0)
-        fi = os.open(path,os.O_RDONLY,0777)
-        func = self.interpret(f,[fi,5]) 
-        res = os.read(fi,2)
+        def f(fi, pos):
+            posix.lseek(fi, pos, 0)
+        fi = os.open(path, os.O_RDONLY, 0777)
+        func = self.interpret(f, [fi, 5]) 
+        res = os.read(fi, 2)
         assert res =='is'
 
     def test_isatty(self):
         def f(fi):
             posix.isatty(fi)
-        fi = os.open(path,os.O_RDONLY,0777)
-        func = self.interpret(f,[fi])
+        fi = os.open(path, os.O_RDONLY, 0777)
+        func = self.interpret(f, [fi])
         assert not func
         os.close(fi)
-        func = self.interpret(f,[fi])
+        func = self.interpret(f, [fi])
         assert not func
 
     def test_getcwd(self):
diff --git a/pypy/rpython/rclass.py b/pypy/rpython/rclass.py
--- a/pypy/rpython/rclass.py
+++ b/pypy/rpython/rclass.py
@@ -364,6 +364,8 @@
     def get_ll_hash_function(self):
         return ll_inst_hash
 
+    get_ll_fasthash_function = get_ll_hash_function
+
     def rtype_type(self, hop):
         raise NotImplementedError
 
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -449,6 +449,21 @@
 
         assert r_AB_dic.lowleveltype == r_BA_dic.lowleveltype
 
+    def test_identity_hash_is_fast(self):
+        class A(object):
+            pass
+
+        def f():
+            return {A(): 1}
+
+        t = TranslationContext()
+        s = t.buildannotator().build_types(f, [])
+        rtyper = t.buildrtyper()
+        rtyper.specialize()
+
+        r_dict = rtyper.getrepr(s)
+        assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+
     def test_tuple_dict(self):
         def f(i):
             d = {}
diff --git a/pypy/rpython/test/test_rlist.py b/pypy/rpython/test/test_rlist.py
--- a/pypy/rpython/test/test_rlist.py
+++ b/pypy/rpython/test/test_rlist.py
@@ -8,6 +8,7 @@
 from pypy.rpython.rlist import *
 from pypy.rpython.lltypesystem.rlist import ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist
 from pypy.rpython.lltypesystem import rlist as ll_rlist
+from pypy.rpython.llinterp import LLException
 from pypy.rpython.ootypesystem import rlist as oo_rlist
 from pypy.rpython.rint import signed_repr
 from pypy.objspace.flow.model import Constant, Variable
@@ -1361,13 +1362,12 @@
                    ("y[*]" in immutable_fields)
 
     def test_hints(self):
-        from pypy.rlib.objectmodel import newlist
-        from pypy.rpython.annlowlevel import hlstr
+        from pypy.rlib.objectmodel import newlist_hint
 
         strings = ['abc', 'def']
         def f(i):
             z = strings[i]
-            x = newlist(sizehint=13)
+            x = newlist_hint(sizehint=13)
             x += z
             return ''.join(x)
 
@@ -1477,6 +1477,80 @@
         assert func1.oopspec == 'list.getitem_foldable(l, index)'
         assert not hasattr(func2, 'oopspec')
 
+    def test_iterate_over_immutable_list(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        lst = list('abcdef')
+        def dummyfn():
+            total = 0
+            for c in lst:
+                total += ord(c)
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            e = raises(LLException, self.interpret, dummyfn, [])
+            assert 'KeyError' in str(e.value)
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+    def test_iterate_over_immutable_list_quasiimmut_attr(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        class Foo:
+            _immutable_fields_ = ['lst?[*]']
+            lst = list('abcdef')
+        foo = Foo()
+        def dummyfn():
+            total = 0
+            for c in foo.lst:
+                total += ord(c)
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            e = raises(LLException, self.interpret, dummyfn, [])
+            assert 'KeyError' in str(e.value)
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+    def test_iterate_over_mutable_list(self):
+        from pypy.rpython import rlist
+        class MyException(Exception):
+            pass
+        lst = list('abcdef')
+        def dummyfn():
+            total = 0
+            for c in lst:
+                total += ord(c)
+            lst[0] = 'x'
+            return total
+        #
+        prev = rlist.ll_getitem_foldable_nonneg
+        try:
+            def seen_ok(l, index):
+                if index == 5:
+                    raise KeyError     # expected case
+                return prev(l, index)
+            rlist.ll_getitem_foldable_nonneg = seen_ok
+            res = self.interpret(dummyfn, [])
+            assert res == sum(map(ord, 'abcdef'))
+        finally:
+            rlist.ll_getitem_foldable_nonneg = prev
+
+
 class TestOOtype(BaseTestRlist, OORtypeMixin):
     rlist = oo_rlist
     type_system = 'ootype'
diff --git a/pypy/rpython/tool/rfficache.py b/pypy/rpython/tool/rfficache.py
--- a/pypy/rpython/tool/rfficache.py
+++ b/pypy/rpython/tool/rfficache.py
@@ -14,6 +14,8 @@
 
 def ask_gcc(question, add_source=""):
     includes = ['stdlib.h', 'stdio.h', 'sys/types.h']
+    if os.name != 'nt':
+        includes += ['inttypes.h']
     include_string = "\n".join(["#include <%s>" % i for i in includes])
     c_source = py.code.Source('''
     // includes
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -93,7 +93,7 @@
                         end_index += 1
                     op.asm = '\n'.join([asm[i][1] for i in range(asm_index, end_index)])
         return loop
-                    
+
     def _asm_disassemble(self, d, origin_addr, tp):
         from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
         return list(machine_code_dump(d, tp, origin_addr))
@@ -109,7 +109,7 @@
         if not argspec.strip():
             return [], None
         if opname == 'debug_merge_point':
-            return argspec.split(", ", 1), None
+            return argspec.split(", ", 2), None
         else:
             args = argspec.split(', ')
             descr = None
@@ -159,7 +159,7 @@
         for op in operations:
             if op.name == 'debug_merge_point':
                 self.inline_level = int(op.args[0])
-                self.parse_code_data(op.args[1][1:-1])
+                self.parse_code_data(op.args[2][1:-1])
                 break
         else:
             self.inline_level = 0
@@ -417,7 +417,7 @@
         part.descr = descrs[i]
         part.comment = trace.comment
         parts.append(part)
-    
+
     return parts
 
 def parse_log_counts(input, loops):
diff --git a/pypy/tool/jitlogparser/test/test_parser.py b/pypy/tool/jitlogparser/test/test_parser.py
--- a/pypy/tool/jitlogparser/test/test_parser.py
+++ b/pypy/tool/jitlogparser/test/test_parser.py
@@ -29,7 +29,7 @@
 def test_parse_non_code():
     ops = parse('''
     []
-    debug_merge_point(0, "SomeRandomStuff")
+    debug_merge_point(0, 0, "SomeRandomStuff")
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 1
@@ -39,10 +39,10 @@
     ops = parse('''
     [i0]
     label()
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage(), loopname='<loopname>')
@@ -57,12 +57,12 @@
 def test_inlined_call():
     ops = parse("""
     []
-    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
+    debug_merge_point(0, 0, '<code object inlined_call. file 'source.py'. line 12> #28 CALL_FUNCTION')
     i18 = getfield_gc(p0, descr=<BoolFieldDescr pypy.interpreter.pyframe.PyFrame.inst_is_being_profiled 89>)
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
-    debug_merge_point(1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
-    debug_merge_point(0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #0 LOAD_FAST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #3 LOAD_CONST')
+    debug_merge_point(1, 1, '<code object inner. file 'source.py'. line 9> #7 RETURN_VALUE')
+    debug_merge_point(0, 0, '<code object inlined_call. file 'source.py'. line 12> #31 STORE_FAST')
     """)
     res = Function.from_operations(ops.operations, LoopStorage())
     assert len(res.chunks) == 3 # two chunks + inlined call
@@ -75,10 +75,10 @@
 def test_name():
     ops = parse('''
     [i0]
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -92,10 +92,10 @@
     ops = parse('''
     [i0]
     i3 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 200> #10 ADD")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 201> #11 SUB")
     i1 = int_add(i0, 1)
-    debug_merge_point(0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
+    debug_merge_point(0, 0, "<code object stuff. file '/I/dont/exist.py'. line 202> #11 SUB")
     i2 = int_add(i1, 1)
     ''')
     res = Function.from_operations(ops.operations, LoopStorage())
@@ -105,10 +105,10 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
-    debug_merge_point(0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #0 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #3 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #6 BINARY_ADD")
+    debug_merge_point(0, 0, "<code object f. file '%(fname)s'. line 2> #7 RETURN_VALUE")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.chunks[1].lineno == 3
@@ -119,11 +119,11 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse('''
     [i0, i1]
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
-    debug_merge_point(0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #9 LOAD_FAST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #12 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #22 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #28 LOAD_CONST")
+    debug_merge_point(0, 0, "<code object g. file '%(fname)s'. line 5> #6 SETUP_LOOP")
     ''' % locals())
     res = Function.from_operations(ops.operations, LoopStorage())
     assert res.linerange == (7, 9)
@@ -135,7 +135,7 @@
     fname = str(py.path.local(__file__).join('..', 'x.py'))
     ops = parse("""
     [p6, p1]
-    debug_merge_point(0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
+    debug_merge_point(0, 0, '<code object h. file '%(fname)s'. line 11> #17 FOR_ITER')
     guard_class(p6, 144264192, descr=<Guard2>)
     p12 = getfield_gc(p6, descr=<GcPtrFieldDescr pypy.objspace.std.iterobject.W_AbstractSeqIterObject.inst_w_seq 12>)
     """ % locals())
@@ -181,7 +181,7 @@
 
 def test_parsing_strliteral():
     loop = parse("""
-    debug_merge_point(0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
+    debug_merge_point(0, 0, 'StrLiteralSearch at 11/51 [17, 8, 3, 1, 1, 1, 1, 51, 0, 19, 51, 1]')
     """)
     ops = Function.from_operations(loop.operations, LoopStorage())
     chunk = ops.chunks[0]
@@ -193,12 +193,12 @@
     loop = parse("""
     # Loop 0 : loop with 19 ops
     [p0, p1, p2, p3, i4]
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
     +166: i6 = int_lt(i4, 10000)
     guard_true(i6, descr=<Guard3>) [p1, p0, p2, p3, i4]
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #27 INPLACE_ADD')
     +179: i8 = int_add(i4, 1)
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #31 JUMP_ABSOLUTE')
     +183: i10 = getfield_raw(40564608, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
     +191: i12 = int_sub(i10, 1)
     +195: setfield_raw(40564608, i12, descr=<SignedFieldDescr pypysig_long_struct.c_value 0>)
@@ -287,8 +287,8 @@
 def test_parse_nonpython():
     loop = parse("""
     []
-    debug_merge_point(0, 'random')
-    debug_merge_point(0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
+    debug_merge_point(0, 0, 'random')
+    debug_merge_point(0, 0, '<code object f. file 'x.py'. line 2> #15 COMPARE_OP')
     """)
     f = Function.from_operations(loop.operations, LoopStorage())
     assert f.chunks[-1].filename == 'x.py'
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -60,7 +60,7 @@
     if sys.platform == 'win32':
         # Can't rename a DLL: it is always called 'libpypy-c.dll'
         for extra in ['libpypy-c.dll',
-                      'libexpat.dll', 'sqlite3.dll', 'msvcr90.dll',
+                      'libexpat.dll', 'sqlite3.dll', 'msvcr100.dll',
                       'libeay32.dll', 'ssleay32.dll']:
             p = pypy_c.dirpath().join(extra)
             if not p.check():
diff --git a/pypy/translator/c/gcc/instruction.py b/pypy/translator/c/gcc/instruction.py
--- a/pypy/translator/c/gcc/instruction.py
+++ b/pypy/translator/c/gcc/instruction.py
@@ -13,13 +13,17 @@
 ARGUMENT_REGISTERS_64 = ('%rdi', '%rsi', '%rdx', '%rcx', '%r8', '%r9')
 
 
-def frameloc_esp(offset):
+def frameloc_esp(offset, wordsize):
     assert offset >= 0
-    assert offset % 4 == 0
+    assert offset % wordsize == 0
+    if wordsize == 8:    # in this case, there are 3 null bits, but we
+        offset >>= 1     # only need 2 of them
     return LOC_ESP_PLUS | offset
 
-def frameloc_ebp(offset):
-    assert offset % 4 == 0
+def frameloc_ebp(offset, wordsize):
+    assert offset % wordsize == 0
+    if wordsize == 8:    # in this case, there are 3 null bits, but we
+        offset >>= 1     # only need 2 of them
     if offset >= 0:
         return LOC_EBP_PLUS | offset
     else:
@@ -57,12 +61,12 @@
             # try to use esp-relative addressing
             ofs_from_esp = framesize + self.ofs_from_frame_end
             if ofs_from_esp % 2 == 0:
-                return frameloc_esp(ofs_from_esp)
+                return frameloc_esp(ofs_from_esp, wordsize)
             # we can get an odd value if the framesize is marked as bogus
             # by visit_andl()
         assert uses_frame_pointer
         ofs_from_ebp = self.ofs_from_frame_end + wordsize
-        return frameloc_ebp(ofs_from_ebp)
+        return frameloc_ebp(ofs_from_ebp, wordsize)
 
 
 class Insn(object):
diff --git a/pypy/translator/c/gcc/trackgcroot.py b/pypy/translator/c/gcc/trackgcroot.py
--- a/pypy/translator/c/gcc/trackgcroot.py
+++ b/pypy/translator/c/gcc/trackgcroot.py
@@ -78,9 +78,9 @@
             if self.is_stack_bottom:
                 retaddr = LOC_NOWHERE     # end marker for asmgcroot.py
             elif self.uses_frame_pointer:
-                retaddr = frameloc_ebp(self.WORD)
+                retaddr = frameloc_ebp(self.WORD, self.WORD)
             else:
-                retaddr = frameloc_esp(insn.framesize)
+                retaddr = frameloc_esp(insn.framesize, self.WORD)
             shape = [retaddr]
             # the first gcroots are always the ones corresponding to
             # the callee-saved registers
@@ -894,6 +894,8 @@
             return '%' + cls.CALLEE_SAVE_REGISTERS[reg].replace("%", "")
         else:
             offset = loc & ~ LOC_MASK
+            if cls.WORD == 8:
+                offset <<= 1
             if kind == LOC_EBP_PLUS:
                 result = '(%' + cls.EBP.replace("%", "") + ')'
             elif kind == LOC_EBP_MINUS:
diff --git a/pypy/translator/c/primitive.py b/pypy/translator/c/primitive.py
--- a/pypy/translator/c/primitive.py
+++ b/pypy/translator/c/primitive.py
@@ -89,6 +89,12 @@
     else:
         return '%dLL' % value
 
+def is_positive_nan(value):
+    # bah.  we don't have math.copysign() if we're running Python 2.5
+    import struct
+    c = struct.pack("!d", value)[0]
+    return {'\x7f': True, '\xff': False}[c]
+
 def name_float(value, db):
     if isinf(value):
         if value > 0:
@@ -96,7 +102,10 @@
         else:
             return '(-Py_HUGE_VAL)'
     elif isnan(value):
-        return '(Py_HUGE_VAL/Py_HUGE_VAL)'
+        if is_positive_nan(value):
+            return '(Py_HUGE_VAL/Py_HUGE_VAL)'
+        else:
+            return '(-(Py_HUGE_VAL/Py_HUGE_VAL))'
     else:
         x = repr(value)
         assert not x.startswith('n')
@@ -112,7 +121,10 @@
             return '((float)-Py_HUGE_VAL)'
     elif isnan(value):
         # XXX are these expressions ok?
-        return '((float)(Py_HUGE_VAL/Py_HUGE_VAL))'
+        if is_positive_nan(value):
+            return '((float)(Py_HUGE_VAL/Py_HUGE_VAL))'
+        else:
+            return '(-(float)(Py_HUGE_VAL/Py_HUGE_VAL))'
     else:
         return repr(value) + 'f'
 
diff --git a/pypy/translator/c/src/asm_gcc_x86.h b/pypy/translator/c/src/asm_gcc_x86.h
--- a/pypy/translator/c/src/asm_gcc_x86.h
+++ b/pypy/translator/c/src/asm_gcc_x86.h
@@ -102,6 +102,12 @@
 #endif  /* !PYPY_CPU_HAS_STANDARD_PRECISION */
 
 
+#ifdef PYPY_X86_CHECK_SSE2
+#define PYPY_X86_CHECK_SSE2_DEFINED
+extern void pypy_x86_check_sse2(void);
+#endif
+
+
 /* implementations */
 
 #ifndef PYPY_NOT_MAIN_FILE
@@ -113,4 +119,27 @@
 }
 #  endif
 
+#  ifdef PYPY_X86_CHECK_SSE2
+void pypy_x86_check_sse2(void)
+{
+    //Read the CPU features.
+    int features;
+    asm("movl $1, %%eax\n"
+        "pushl %%ebx\n"
+        "cpuid\n"
+        "popl %%ebx\n"
+        "movl %%edx, %0"
+        : "=g"(features) : : "eax", "edx", "ecx");
+    
+    //Check bits 25 and 26, this indicates SSE2 support
+    if (((features & (1 << 25)) == 0) || ((features & (1 << 26)) == 0))
+    {
+        fprintf(stderr, "Old CPU with no SSE2 support, cannot continue.\n"
+                        "You need to re-translate with "
+                        "'--jit-backend=x86-without-sse2'\n");
+        abort();
+    }
+}
+#  endif
+
 #endif
diff --git a/pypy/translator/c/src/debug_print.c b/pypy/translator/c/src/debug_print.c
--- a/pypy/translator/c/src/debug_print.c
+++ b/pypy/translator/c/src/debug_print.c
@@ -1,3 +1,4 @@
+#define PYPY_NOT_MAIN_FILE
 
 #include <string.h>
 #include <stddef.h>
diff --git a/pypy/translator/c/src/dtoa.c b/pypy/translator/c/src/dtoa.c
--- a/pypy/translator/c/src/dtoa.c
+++ b/pypy/translator/c/src/dtoa.c
@@ -46,13 +46,13 @@
  *     of return type *Bigint all return NULL to indicate a malloc failure.
  *     Similarly, rv_alloc and nrv_alloc (return type char *) return NULL on
  *     failure.  bigcomp now has return type int (it used to be void) and
- *     returns -1 on failure and 0 otherwise.  _Py_dg_dtoa returns NULL
- *     on failure.  _Py_dg_strtod indicates failure due to malloc failure
+ *     returns -1 on failure and 0 otherwise.  __Py_dg_dtoa returns NULL
+ *     on failure.  __Py_dg_strtod indicates failure due to malloc failure
  *     by returning -1.0, setting errno=ENOMEM and *se to s00.
  *
  *  4. The static variable dtoa_result has been removed.  Callers of
- *     _Py_dg_dtoa are expected to call _Py_dg_freedtoa to free
- *     the memory allocated by _Py_dg_dtoa.
+ *     __Py_dg_dtoa are expected to call __Py_dg_freedtoa to free
+ *     the memory allocated by __Py_dg_dtoa.
  *
  *  5. The code has been reformatted to better fit with Python's
  *     C style guide (PEP 7).
@@ -61,7 +61,7 @@
  *     that hasn't been MALLOC'ed, private_mem should only be used when k <=
  *     Kmax.
  *
- *  7. _Py_dg_strtod has been modified so that it doesn't accept strings with
+ *  7. __Py_dg_strtod has been modified so that it doesn't accept strings with
  *     leading whitespace.
  *
  ***************************************************************/
@@ -283,7 +283,7 @@
 #define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
 #define Big1 0xffffffff
 
-/* struct BCinfo is used to pass information from _Py_dg_strtod to bigcomp */
+/* struct BCinfo is used to pass information from __Py_dg_strtod to bigcomp */
 
 typedef struct BCinfo BCinfo;
 struct
@@ -494,7 +494,7 @@
 
 /* convert a string s containing nd decimal digits (possibly containing a
    decimal separator at position nd0, which is ignored) to a Bigint.  This
-   function carries on where the parsing code in _Py_dg_strtod leaves off: on
+   function carries on where the parsing code in __Py_dg_strtod leaves off: on
    entry, y9 contains the result of converting the first 9 digits.  Returns
    NULL on failure. */
 
@@ -1050,7 +1050,7 @@
 }
 
 /* Convert a scaled double to a Bigint plus an exponent.  Similar to d2b,
-   except that it accepts the scale parameter used in _Py_dg_strtod (which
+   except that it accepts the scale parameter used in __Py_dg_strtod (which
    should be either 0 or 2*P), and the normalization for the return value is
    different (see below).  On input, d should be finite and nonnegative, and d
    / 2**scale should be exactly representable as an IEEE 754 double.
@@ -1351,9 +1351,9 @@
 /* The bigcomp function handles some hard cases for strtod, for inputs
    with more than STRTOD_DIGLIM digits.  It's called once an initial
    estimate for the double corresponding to the input string has
-   already been obtained by the code in _Py_dg_strtod.
+   already been obtained by the code in __Py_dg_strtod.
 
-   The bigcomp function is only called after _Py_dg_strtod has found a
+   The bigcomp function is only called after __Py_dg_strtod has found a
    double value rv such that either rv or rv + 1ulp represents the
    correctly rounded value corresponding to the original string.  It
    determines which of these two values is the correct one by
@@ -1368,12 +1368,12 @@
      s0 points to the first significant digit of the input string.
 
      rv is a (possibly scaled) estimate for the closest double value to the
-        value represented by the original input to _Py_dg_strtod.  If
+        value represented by the original input to __Py_dg_strtod.  If
         bc->scale is nonzero, then rv/2^(bc->scale) is the approximation to
         the input value.
 
      bc is a struct containing information gathered during the parsing and
-        estimation steps of _Py_dg_strtod.  Description of fields follows:
+        estimation steps of __Py_dg_strtod.  Description of fields follows:
 
         bc->e0 gives the exponent of the input value, such that dv = (integer
            given by the bd->nd digits of s0) * 10**e0
@@ -1505,7 +1505,7 @@
 }
 
 static double
-_Py_dg_strtod(const char *s00, char **se)
+__Py_dg_strtod(const char *s00, char **se)
 {
     int bb2, bb5, bbe, bd2, bd5, bs2, c, dsign, e, e1, error;
     int esign, i, j, k, lz, nd, nd0, odd, sign;
@@ -1849,7 +1849,7 @@
 
     for(;;) {
 
-        /* This is the main correction loop for _Py_dg_strtod.
+        /* This is the main correction loop for __Py_dg_strtod.
 
            We've got a decimal value tdv, and a floating-point approximation
            srv=rv/2^bc.scale to tdv.  The aim is to determine whether srv is
@@ -2283,7 +2283,7 @@
  */
 
 static void
-_Py_dg_freedtoa(char *s)
+__Py_dg_freedtoa(char *s)
 {
     Bigint *b = (Bigint *)((int *)s - 1);
     b->maxwds = 1 << (b->k = *(int*)b);
@@ -2325,11 +2325,11 @@
  */
 
 /* Additional notes (METD): (1) returns NULL on failure.  (2) to avoid memory
-   leakage, a successful call to _Py_dg_dtoa should always be matched by a
-   call to _Py_dg_freedtoa. */
+   leakage, a successful call to __Py_dg_dtoa should always be matched by a
+   call to __Py_dg_freedtoa. */
 
 static char *
-_Py_dg_dtoa(double dd, int mode, int ndigits,
+__Py_dg_dtoa(double dd, int mode, int ndigits,
             int *decpt, int *sign, char **rve)
 {
     /*  Arguments ndigits, decpt, sign are similar to those
@@ -2926,7 +2926,7 @@
     if (b)
         Bfree(b);
     if (s0)
-        _Py_dg_freedtoa(s0);
+        __Py_dg_freedtoa(s0);
     return NULL;
 }
 
@@ -2947,7 +2947,7 @@
     _PyPy_SET_53BIT_PRECISION_HEADER;
 
     _PyPy_SET_53BIT_PRECISION_START;
-    result = _Py_dg_strtod(s00, se);
+    result = __Py_dg_strtod(s00, se);
     _PyPy_SET_53BIT_PRECISION_END;
     return result;
 }
@@ -2959,14 +2959,14 @@
     _PyPy_SET_53BIT_PRECISION_HEADER;
 
     _PyPy_SET_53BIT_PRECISION_START;
-    result = _Py_dg_dtoa(dd, mode, ndigits, decpt, sign, rve);
+    result = __Py_dg_dtoa(dd, mode, ndigits, decpt, sign, rve);
     _PyPy_SET_53BIT_PRECISION_END;
     return result;
 }
 
 void _PyPy_dg_freedtoa(char *s)
 {
-    _Py_dg_freedtoa(s);
+    __Py_dg_freedtoa(s);
 }
 /* End PYPY hacks */
 
diff --git a/pypy/translator/c/src/libffi_msvc/ffi.c b/pypy/translator/c/src/libffi_msvc/ffi.c
--- a/pypy/translator/c/src/libffi_msvc/ffi.c
+++ b/pypy/translator/c/src/libffi_msvc/ffi.c
@@ -71,31 +71,31 @@
 	  switch ((*p_arg)->type)
 	    {
 	    case FFI_TYPE_SINT8:
-	      *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+	      *(signed int *) argp = (signed int)*(ffi_SINT8 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_UINT8:
-	      *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+	      *(unsigned int *) argp = (unsigned int)*(ffi_UINT8 *)(* p_argv);
 	      break;
 
 	    case FFI_TYPE_SINT16:
-	      *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+	      *(signed int *) argp = (signed int)*(ffi_SINT16 *)(* p_argv);


More information about the pypy-commit mailing list