[pypy-commit] pypy disable_pythonapi: merge default into branch

Mon Jun 30 03:11:38 CEST 2014

Author: mattip <matti.picus at gmail.com>
Branch: disable_pythonapi
Changeset: r72276:095a123d35c1
Date: 2014-06-30 04:08 +0300
http://bitbucket.org/pypy/pypy/changeset/095a123d35c1/

Log:	merge default into branch

diff too long, truncating to 2000 out of 3709 lines

diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py
--- a/lib_pypy/cffi/api.py
+++ b/lib_pypy/cffi/api.py
@@ -443,6 +443,10 @@
                 for enumname, enumval in zip(tp.enumerators, tp.enumvalues):
                     if enumname not in library.__dict__:
                         library.__dict__[enumname] = enumval
+            for key, val in ffi._parser._int_constants.items():
+                if key not in library.__dict__:
+                    library.__dict__[key] = val
+
             copied_enums.append(True)
             if name in library.__dict__:
                 return
diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py
--- a/lib_pypy/cffi/cparser.py
+++ b/lib_pypy/cffi/cparser.py
@@ -24,6 +24,7 @@
 _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
 _r_words = re.compile(r"\w+|\S")
 _parser_cache = None
+_r_int_literal = re.compile(r"^0?x?[0-9a-f]+u?l?$", re.IGNORECASE)
 
 def _get_parser():
     global _parser_cache
@@ -99,6 +100,7 @@
         self._structnode2type = weakref.WeakKeyDictionary()
         self._override = False
         self._packed = False
+        self._int_constants = {}
 
     def _parse(self, csource):
         csource, macros = _preprocess(csource)
@@ -128,9 +130,10 @@
         finally:
             if lock is not None:
                 lock.release()
-        return ast, macros
+        # csource will be used to find buggy source text
+        return ast, macros, csource
 
-    def convert_pycparser_error(self, e, csource):
+    def _convert_pycparser_error(self, e, csource):
         # xxx look for ":NUM:" at the start of str(e) and try to interpret
         # it as a line number
         line = None
@@ -142,6 +145,12 @@
                 csourcelines = csource.splitlines()
                 if 1 <= linenum <= len(csourcelines):
                     line = csourcelines[linenum-1]
+        return line
+
+    def convert_pycparser_error(self, e, csource):
+        line = self._convert_pycparser_error(e, csource)
+
+        msg = str(e)
         if line:
             msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
         else:
@@ -160,14 +169,9 @@
             self._packed = prev_packed
 
     def _internal_parse(self, csource):
-        ast, macros = self._parse(csource)
+        ast, macros, csource = self._parse(csource)
         # add the macros
-        for key, value in macros.items():
-            value = value.strip()
-            if value != '...':
-                raise api.CDefError('only supports the syntax "#define '
-                                    '%s ..." for now (literally)' % key)
-            self._declare('macro ' + key, value)
+        self._process_macros(macros)
         # find the first "__dotdotdot__" and use that as a separator
         # between the repeated typedefs and the real csource
         iterator = iter(ast.ext)
@@ -175,27 +179,61 @@
             if decl.name == '__dotdotdot__':
                 break
         #
-        for decl in iterator:
-            if isinstance(decl, pycparser.c_ast.Decl):
-                self._parse_decl(decl)
-            elif isinstance(decl, pycparser.c_ast.Typedef):
-                if not decl.name:
-                    raise api.CDefError("typedef does not declare any name",
-                                        decl)
-                if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType)
-                        and decl.type.type.names == ['__dotdotdot__']):
-                    realtype = model.unknown_type(decl.name)
-                elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
-                      isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
-                      isinstance(decl.type.type.type,
-                                 pycparser.c_ast.IdentifierType) and
-                      decl.type.type.type.names == ['__dotdotdot__']):
-                    realtype = model.unknown_ptr_type(decl.name)
+        try:
+            for decl in iterator:
+                if isinstance(decl, pycparser.c_ast.Decl):
+                    self._parse_decl(decl)
+                elif isinstance(decl, pycparser.c_ast.Typedef):
+                    if not decl.name:
+                        raise api.CDefError("typedef does not declare any name",
+                                            decl)
+                    if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType)
+                            and decl.type.type.names == ['__dotdotdot__']):
+                        realtype = model.unknown_type(decl.name)
+                    elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
+                          isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
+                          isinstance(decl.type.type.type,
+                                     pycparser.c_ast.IdentifierType) and
+                          decl.type.type.type.names == ['__dotdotdot__']):
+                        realtype = model.unknown_ptr_type(decl.name)
+                    else:
+                        realtype = self._get_type(decl.type, name=decl.name)
+                    self._declare('typedef ' + decl.name, realtype)
                 else:
-                    realtype = self._get_type(decl.type, name=decl.name)
-                self._declare('typedef ' + decl.name, realtype)
+                    raise api.CDefError("unrecognized construct", decl)
+        except api.FFIError as e:
+            msg = self._convert_pycparser_error(e, csource)
+            if msg:
+                e.args = (e.args[0] + "\n    *** Err: %s" % msg,)
+            raise
+
+    def _add_constants(self, key, val):
+        if key in self._int_constants:
+            raise api.FFIError(
+                "multiple declarations of constant: %s" % (key,))
+        self._int_constants[key] = val
+
+    def _process_macros(self, macros):
+        for key, value in macros.items():
+            value = value.strip()
+            match = _r_int_literal.search(value)
+            if match is not None:
+                int_str = match.group(0).lower().rstrip("ul")
+
+                # "010" is not valid oct in py3
+                if (int_str.startswith("0") and
+                        int_str != "0" and
+                        not int_str.startswith("0x")):
+                    int_str = "0o" + int_str[1:]
+
+                pyvalue = int(int_str, 0)
+                self._add_constants(key, pyvalue)
+            elif value == '...':
+                self._declare('macro ' + key, value)
             else:
-                raise api.CDefError("unrecognized construct", decl)
+                raise api.CDefError('only supports the syntax "#define '
+                                    '%s ..." (literally) or "#define '
+                                    '%s 0x1FF" for now' % (key, key))
 
     def _parse_decl(self, decl):
         node = decl.type
@@ -227,7 +265,7 @@
                     self._declare('variable ' + decl.name, tp)
 
     def parse_type(self, cdecl):
-        ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)
+        ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
         assert not macros
         exprnode = ast.ext[-1].type.args.params[0]
         if isinstance(exprnode, pycparser.c_ast.ID):
@@ -306,7 +344,8 @@
                 if ident == 'void':
                     return model.void_type
                 if ident == '__dotdotdot__':
-                    raise api.FFIError('bad usage of "..."')
+                    raise api.FFIError(':%d: bad usage of "..."' %
+                            typenode.coord.line)
                 return resolve_common_type(ident)
             #
             if isinstance(type, pycparser.c_ast.Struct):
@@ -333,7 +372,8 @@
             return self._get_struct_union_enum_type('union', typenode, name,
                                                     nested=True)
         #
-        raise api.FFIError("bad or unsupported type declaration")
+        raise api.FFIError(":%d: bad or unsupported type declaration" %
+                typenode.coord.line)
 
     def _parse_function_type(self, typenode, funcname=None):
         params = list(getattr(typenode.args, 'params', []))
@@ -499,6 +539,10 @@
         if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
                 exprnode.op == '-'):
             return -self._parse_constant(exprnode.expr)
+        # load previously defined int constant
+        if (isinstance(exprnode, pycparser.c_ast.ID) and
+                exprnode.name in self._int_constants):
+            return self._int_constants[exprnode.name]
         #
         if partial_length_ok:
             if (isinstance(exprnode, pycparser.c_ast.ID) and
@@ -506,8 +550,8 @@
                 self._partial_length = True
                 return '...'
         #
-        raise api.FFIError("unsupported expression: expected a "
-                           "simple numeric constant")
+        raise api.FFIError(":%d: unsupported expression: expected a "
+                           "simple numeric constant" % exprnode.coord.line)
 
     def _build_enum_type(self, explicit_name, decls):
         if decls is not None:
@@ -522,6 +566,7 @@
                 if enum.value is not None:
                     nextenumvalue = self._parse_constant(enum.value)
                 enumvalues.append(nextenumvalue)
+                self._add_constants(enum.name, nextenumvalue)
                 nextenumvalue += 1
             enumvalues = tuple(enumvalues)
             tp = model.EnumType(explicit_name, enumerators, enumvalues)
@@ -535,3 +580,5 @@
             kind = name.split(' ', 1)[0]
             if kind in ('typedef', 'struct', 'union', 'enum'):
                 self._declare(name, tp)
+        for k, v in other._int_constants.items():
+            self._add_constants(k, v)
diff --git a/lib_pypy/cffi/ffiplatform.py b/lib_pypy/cffi/ffiplatform.py
--- a/lib_pypy/cffi/ffiplatform.py
+++ b/lib_pypy/cffi/ffiplatform.py
@@ -38,6 +38,7 @@
     import distutils.errors
     #
     dist = Distribution({'ext_modules': [ext]})
+    dist.parse_config_files()
     options = dist.get_option_dict('build_ext')
     options['force'] = ('ffiplatform', True)
     options['build_lib'] = ('ffiplatform', tmpdir)
diff --git a/lib_pypy/cffi/vengine_cpy.py b/lib_pypy/cffi/vengine_cpy.py
--- a/lib_pypy/cffi/vengine_cpy.py
+++ b/lib_pypy/cffi/vengine_cpy.py
@@ -89,43 +89,54 @@
         # by generate_cpy_function_method().
         prnt('static PyMethodDef _cffi_methods[] = {')
         self._generate("method")
-        prnt('  {"_cffi_setup", _cffi_setup, METH_VARARGS},')
-        prnt('  {NULL, NULL}    /* Sentinel */')
+        prnt('  {"_cffi_setup", _cffi_setup, METH_VARARGS, NULL},')
+        prnt('  {NULL, NULL, 0, NULL}    /* Sentinel */')
         prnt('};')
         prnt()
         #
         # standard init.
         modname = self.verifier.get_module_name()
-        if sys.version_info >= (3,):
-            prnt('static struct PyModuleDef _cffi_module_def = {')
-            prnt('  PyModuleDef_HEAD_INIT,')
-            prnt('  "%s",' % modname)
-            prnt('  NULL,')
-            prnt('  -1,')
-            prnt('  _cffi_methods,')
-            prnt('  NULL, NULL, NULL, NULL')
-            prnt('};')
-            prnt()
-            initname = 'PyInit_%s' % modname
-            createmod = 'PyModule_Create(&_cffi_module_def)'
-            errorcase = 'return NULL'
-            finalreturn = 'return lib'
-        else:
-            initname = 'init%s' % modname
-            createmod = 'Py_InitModule("%s", _cffi_methods)' % modname
-            errorcase = 'return'
-            finalreturn = 'return'
+        constants = self._chained_list_constants[False]
+        prnt('#if PY_MAJOR_VERSION >= 3')
+        prnt()
+        prnt('static struct PyModuleDef _cffi_module_def = {')
+        prnt('  PyModuleDef_HEAD_INIT,')
+        prnt('  "%s",' % modname)
+        prnt('  NULL,')
+        prnt('  -1,')
+        prnt('  _cffi_methods,')
+        prnt('  NULL, NULL, NULL, NULL')
+        prnt('};')
+        prnt()
         prnt('PyMODINIT_FUNC')
-        prnt('%s(void)' % initname)
+        prnt('PyInit_%s(void)' % modname)
         prnt('{')
         prnt('  PyObject *lib;')
-        prnt('  lib = %s;' % createmod)
-        prnt('  if (lib == NULL || %s < 0)' % (
-            self._chained_list_constants[False],))
-        prnt('    %s;' % errorcase)
-        prnt('  _cffi_init();')
-        prnt('  %s;' % finalreturn)
+        prnt('  lib = PyModule_Create(&_cffi_module_def);')
+        prnt('  if (lib == NULL)')
+        prnt('    return NULL;')
+        prnt('  if (%s < 0 || _cffi_init() < 0) {' % (constants,))
+        prnt('    Py_DECREF(lib);')
+        prnt('    return NULL;')
+        prnt('  }')
+        prnt('  return lib;')
         prnt('}')
+        prnt()
+        prnt('#else')
+        prnt()
+        prnt('PyMODINIT_FUNC')
+        prnt('init%s(void)' % modname)
+        prnt('{')
+        prnt('  PyObject *lib;')
+        prnt('  lib = Py_InitModule("%s", _cffi_methods);' % modname)
+        prnt('  if (lib == NULL)')
+        prnt('    return;')
+        prnt('  if (%s < 0 || _cffi_init() < 0)' % (constants,))
+        prnt('    return;')
+        prnt('  return;')
+        prnt('}')
+        prnt()
+        prnt('#endif')
 
     def load_library(self):
         # XXX review all usages of 'self' here!
@@ -394,7 +405,7 @@
             meth = 'METH_O'
         else:
             meth = 'METH_VARARGS'
-        self._prnt('  {"%s", _cffi_f_%s, %s},' % (name, name, meth))
+        self._prnt('  {"%s", _cffi_f_%s, %s, NULL},' % (name, name, meth))
 
     _loading_cpy_function = _loaded_noop
 
@@ -481,8 +492,8 @@
         if tp.fldnames is None:
             return     # nothing to do with opaque structs
         layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name)
-        self._prnt('  {"%s", %s, METH_NOARGS},' % (layoutfuncname,
-                                                   layoutfuncname))
+        self._prnt('  {"%s", %s, METH_NOARGS, NULL},' % (layoutfuncname,
+                                                         layoutfuncname))
 
     def _loading_struct_or_union(self, tp, prefix, name, module):
         if tp.fldnames is None:
@@ -589,13 +600,7 @@
                                                           'variable type'),))
             assert delayed
         else:
-            prnt('  if (LONG_MIN <= (%s) && (%s) <= LONG_MAX)' % (name, name))
-            prnt('    o = PyInt_FromLong((long)(%s));' % (name,))
-            prnt('  else if ((%s) <= 0)' % (name,))
-            prnt('    o = PyLong_FromLongLong((long long)(%s));' % (name,))
-            prnt('  else')
-            prnt('    o = PyLong_FromUnsignedLongLong('
-                 '(unsigned long long)(%s));' % (name,))
+            prnt('  o = _cffi_from_c_int_const(%s);' % name)
         prnt('  if (o == NULL)')
         prnt('    return -1;')
         if size_too:
@@ -632,13 +637,18 @@
     # ----------
     # enums
 
+    def _enum_funcname(self, prefix, name):
+        # "$enum_$1" => "___D_enum____D_1"
+        name = name.replace('$', '___D_')
+        return '_cffi_e_%s_%s' % (prefix, name)
+
     def _generate_cpy_enum_decl(self, tp, name, prefix='enum'):
         if tp.partial:
             for enumerator in tp.enumerators:
                 self._generate_cpy_const(True, enumerator, delayed=False)
             return
         #
-        funcname = '_cffi_e_%s_%s' % (prefix, name)
+        funcname = self._enum_funcname(prefix, name)
         prnt = self._prnt
         prnt('static int %s(PyObject *lib)' % funcname)
         prnt('{')
@@ -760,17 +770,30 @@
 #include <Python.h>
 #include <stddef.h>
 
-#ifdef MS_WIN32
-#include <malloc.h>   /* for alloca() */
-typedef __int8 int8_t;
-typedef __int16 int16_t;
-typedef __int32 int32_t;
-typedef __int64 int64_t;
-typedef unsigned __int8 uint8_t;
-typedef unsigned __int16 uint16_t;
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-typedef unsigned char _Bool;
+/* this block of #ifs should be kept exactly identical between
+   c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */
+#if defined(_MSC_VER)
+# include <malloc.h>   /* for alloca() */
+# if _MSC_VER < 1600   /* MSVC < 2010 */
+   typedef __int8 int8_t;
+   typedef __int16 int16_t;
+   typedef __int32 int32_t;
+   typedef __int64 int64_t;
+   typedef unsigned __int8 uint8_t;
+   typedef unsigned __int16 uint16_t;
+   typedef unsigned __int32 uint32_t;
+   typedef unsigned __int64 uint64_t;
+# else
+#  include <stdint.h>
+# endif
+# if _MSC_VER < 1800   /* MSVC < 2013 */
+   typedef unsigned char _Bool;
+# endif
+#else
+# include <stdint.h>
+# if (defined (__SVR4) && defined (__sun)) || defined(_AIX)
+#  include <alloca.h>
+# endif
 #endif
 
 #if PY_MAJOR_VERSION < 3
@@ -795,6 +818,15 @@
 #define _cffi_to_c_double PyFloat_AsDouble
 #define _cffi_to_c_float PyFloat_AsDouble
 
+#define _cffi_from_c_int_const(x)                                        \
+    (((x) > 0) ?                                                         \
+        ((unsigned long long)(x) <= (unsigned long long)LONG_MAX) ?      \
+            PyInt_FromLong((long)(x)) :                                  \
+            PyLong_FromUnsignedLongLong((unsigned long long)(x)) :       \
+        ((long long)(x) >= (long long)LONG_MIN) ?                        \
+            PyInt_FromLong((long)(x)) :                                  \
+            PyLong_FromLongLong((long long)(x)))
+
 #define _cffi_from_c_int(x, type)                                        \
     (((type)-1) > 0 ?   /* unsigned */                                   \
         (sizeof(type) < sizeof(long) ? PyInt_FromLong(x) :               \
@@ -804,14 +836,14 @@
                                         PyLong_FromLongLong(x)))
 
 #define _cffi_to_c_int(o, type)                                          \
-    (sizeof(type) == 1 ? (((type)-1) > 0 ? _cffi_to_c_u8(o)              \
-                                         : _cffi_to_c_i8(o)) :           \
-     sizeof(type) == 2 ? (((type)-1) > 0 ? _cffi_to_c_u16(o)             \
-                                         : _cffi_to_c_i16(o)) :          \
-     sizeof(type) == 4 ? (((type)-1) > 0 ? _cffi_to_c_u32(o)             \
-                                         : _cffi_to_c_i32(o)) :          \
-     sizeof(type) == 8 ? (((type)-1) > 0 ? _cffi_to_c_u64(o)             \
-                                         : _cffi_to_c_i64(o)) :          \
+    (sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o)        \
+                                         : (type)_cffi_to_c_i8(o)) :     \
+     sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o)       \
+                                         : (type)_cffi_to_c_i16(o)) :    \
+     sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o)       \
+                                         : (type)_cffi_to_c_i32(o)) :    \
+     sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o)       \
+                                         : (type)_cffi_to_c_i64(o)) :    \
      (Py_FatalError("unsupported size for type " #type), 0))
 
 #define _cffi_to_c_i8                                                    \
@@ -885,25 +917,32 @@
     return PyBool_FromLong(was_alive);
 }
 
-static void _cffi_init(void)
+static int _cffi_init(void)
 {
-    PyObject *module = PyImport_ImportModule("_cffi_backend");
-    PyObject *c_api_object;
+    PyObject *module, *c_api_object = NULL;
 
+    module = PyImport_ImportModule("_cffi_backend");
     if (module == NULL)
-        return;
+        goto failure;
 
     c_api_object = PyObject_GetAttrString(module, "_C_API");
     if (c_api_object == NULL)
-        return;
+        goto failure;
     if (!PyCapsule_CheckExact(c_api_object)) {
-        Py_DECREF(c_api_object);
         PyErr_SetNone(PyExc_ImportError);
-        return;
+        goto failure;
     }
     memcpy(_cffi_exports, PyCapsule_GetPointer(c_api_object, "cffi"),
            _CFFI_NUM_EXPORTS * sizeof(void *));
+
+    Py_DECREF(module);
     Py_DECREF(c_api_object);
+    return 0;
+
+  failure:
+    Py_XDECREF(module);
+    Py_XDECREF(c_api_object);
+    return -1;
 }
 
 #define _cffi_type(num) ((CTypeDescrObject *)PyList_GET_ITEM(_cffi_types, num))
diff --git a/lib_pypy/cffi/vengine_gen.py b/lib_pypy/cffi/vengine_gen.py
--- a/lib_pypy/cffi/vengine_gen.py
+++ b/lib_pypy/cffi/vengine_gen.py
@@ -249,10 +249,10 @@
                     prnt('  /* %s */' % str(e))   # cannot verify it, ignore
         prnt('}')
         self.export_symbols.append(layoutfuncname)
-        prnt('ssize_t %s(ssize_t i)' % (layoutfuncname,))
+        prnt('intptr_t %s(intptr_t i)' % (layoutfuncname,))
         prnt('{')
         prnt('  struct _cffi_aligncheck { char x; %s y; };' % cname)
-        prnt('  static ssize_t nums[] = {')
+        prnt('  static intptr_t nums[] = {')
         prnt('    sizeof(%s),' % cname)
         prnt('    offsetof(struct _cffi_aligncheck, y),')
         for fname, ftype, fbitsize in tp.enumfields():
@@ -276,7 +276,7 @@
             return     # nothing to do with opaque structs
         layoutfuncname = '_cffi_layout_%s_%s' % (prefix, name)
         #
-        BFunc = self.ffi._typeof_locked("ssize_t(*)(ssize_t)")[0]
+        BFunc = self.ffi._typeof_locked("intptr_t(*)(intptr_t)")[0]
         function = module.load_function(BFunc, layoutfuncname)
         layout = []
         num = 0
@@ -410,13 +410,18 @@
     # ----------
     # enums
 
+    def _enum_funcname(self, prefix, name):
+        # "$enum_$1" => "___D_enum____D_1"
+        name = name.replace('$', '___D_')
+        return '_cffi_e_%s_%s' % (prefix, name)
+
     def _generate_gen_enum_decl(self, tp, name, prefix='enum'):
         if tp.partial:
             for enumerator in tp.enumerators:
                 self._generate_gen_const(True, enumerator)
             return
         #
-        funcname = '_cffi_e_%s_%s' % (prefix, name)
+        funcname = self._enum_funcname(prefix, name)
         self.export_symbols.append(funcname)
         prnt = self._prnt
         prnt('int %s(char *out_error)' % funcname)
@@ -453,7 +458,7 @@
         else:
             BType = self.ffi._typeof_locked("char[]")[0]
             BFunc = self.ffi._typeof_locked("int(*)(char*)")[0]
-            funcname = '_cffi_e_%s_%s' % (prefix, name)
+            funcname = self._enum_funcname(prefix, name)
             function = module.load_function(BFunc, funcname)
             p = self.ffi.new(BType, 256)
             if function(p) < 0:
@@ -547,20 +552,29 @@
 #include <errno.h>
 #include <sys/types.h>   /* XXX for ssize_t on some platforms */
 
-#ifdef _WIN32
-#  include <Windows.h>
-#  define snprintf _snprintf
-typedef __int8 int8_t;
-typedef __int16 int16_t;
-typedef __int32 int32_t;
-typedef __int64 int64_t;
-typedef unsigned __int8 uint8_t;
-typedef unsigned __int16 uint16_t;
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-typedef SSIZE_T ssize_t;
-typedef unsigned char _Bool;
+/* this block of #ifs should be kept exactly identical between
+   c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py */
+#if defined(_MSC_VER)
+# include <malloc.h>   /* for alloca() */
+# if _MSC_VER < 1600   /* MSVC < 2010 */
+   typedef __int8 int8_t;
+   typedef __int16 int16_t;
+   typedef __int32 int32_t;
+   typedef __int64 int64_t;
+   typedef unsigned __int8 uint8_t;
+   typedef unsigned __int16 uint16_t;
+   typedef unsigned __int32 uint32_t;
+   typedef unsigned __int64 uint64_t;
+# else
+#  include <stdint.h>
+# endif
+# if _MSC_VER < 1800   /* MSVC < 2013 */
+   typedef unsigned char _Bool;
+# endif
 #else
-#  include <stdint.h>
+# include <stdint.h>
+# if (defined (__SVR4) && defined (__sun)) || defined(_AIX)
+#  include <alloca.h>
+# endif
 #endif
 '''
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -465,9 +465,13 @@
 
 This is documented (here__ and here__).  It needs 4 GB of RAM to run
 "rpython targetpypystandalone" on top of PyPy, a bit more when running
-on CPython.  If you have less than 4 GB it will just swap forever (or
-fail if you don't have enough swap).  On 32-bit, divide the numbers by
-two.
+on top of CPython.  If you have less than 4 GB free, it will just swap
+forever (or fail if you don't have enough swap).  And we mean *free:*
+if the machine has 4 GB *in total,* then it will swap.
+
+On 32-bit, divide the numbers by two.  (We didn't try recently, but in
+the past it was possible to compile a 32-bit version on a 2 GB Linux
+machine with nothing else running: no Gnome/KDE, for example.)
 
 .. __: http://pypy.org/download.html#building-from-source
 .. __: https://pypy.readthedocs.org/en/latest/getting-started-python.html#translating-the-pypy-python-interpreter
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -30,3 +30,16 @@
 x86-64, the JIT backend has a special optimization that lets it emit
 directly a single MOV from a %gs- or %fs-based address. It seems
 actually to give a good boost in performance.
+
+.. branch: fast-gil
+A faster way to handle the GIL, particularly in JIT code. The GIL is
+now a composite of two concepts: a global number (it's just set from
+1 to 0 and back around CALL_RELEASE_GIL), and a real mutex. If there
+are threads waiting to acquire the GIL, one of them is actively
+checking the global number every 0.1 ms to 1 ms.  Overall, JIT loops
+full of external function calls now run a bit faster (if no thread was
+started yet), or a *lot* faster (if threads were started already).
+
+.. branch: jit-get-errno
+Optimize the errno handling in the JIT, notably around external
+function calls. Linux-only.
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -496,6 +496,13 @@
     """
 
 
+class UserDelCallback(object):
+    def __init__(self, w_obj, callback, descrname):
+        self.w_obj = w_obj
+        self.callback = callback
+        self.descrname = descrname
+        self.next = None
+
 class UserDelAction(AsyncAction):
     """An action that invokes all pending app-level __del__() method.
     This is done as an action instead of immediately when the
@@ -506,12 +513,18 @@
 
     def __init__(self, space):
         AsyncAction.__init__(self, space)
-        self.dying_objects = []
+        self.dying_objects = None
+        self.dying_objects_last = None
         self.finalizers_lock_count = 0
         self.enabled_at_app_level = True
 
     def register_callback(self, w_obj, callback, descrname):
-        self.dying_objects.append((w_obj, callback, descrname))
+        cb = UserDelCallback(w_obj, callback, descrname)
+        if self.dying_objects_last is None:
+            self.dying_objects = cb
+        else:
+            self.dying_objects_last.next = cb
+        self.dying_objects_last = cb
         self.fire()
 
     def perform(self, executioncontext, frame):
@@ -525,13 +538,33 @@
         # avoid too deep recursions of the kind of __del__ being called
         # while in the middle of another __del__ call.
         pending = self.dying_objects
-        self.dying_objects = []
+        self.dying_objects = None
+        self.dying_objects_last = None
         space = self.space
-        for i in range(len(pending)):
-            w_obj, callback, descrname = pending[i]
-            pending[i] = (None, None, None)
+        while pending is not None:
             try:
-                callback(w_obj)
+                pending.callback(pending.w_obj)
             except OperationError, e:
-                e.write_unraisable(space, descrname, w_obj)
+                e.write_unraisable(space, pending.descrname, pending.w_obj)
                 e.clear(space)   # break up reference cycles
+            pending = pending.next
+        #
+        # Note: 'dying_objects' used to be just a regular list instead
+        # of a chained list.  This was the cause of "leaks" if we have a
+        # program that constantly creates new objects with finalizers.
+        # Here is why: say 'dying_objects' is a long list, and there
+        # are n instances in it.  Then we spend some time in this
+        # function, possibly triggering more GCs, but keeping the list
+        # of length n alive.  Then the list is suddenly freed at the
+        # end, and we return to the user program.  At this point the
+        # GC limit is still very high, because just before, there was
+        # a list of length n alive.  Assume that the program continues
+        # to allocate a lot of instances with finalizers.  The high GC
+        # limit means that it could allocate a lot of instances before
+        # reaching it --- possibly more than n.  So the whole procedure
+        # repeats with higher and higher values of n.
+        #
+        # This does not occur in the current implementation because
+        # there is no list of length n: if n is large, then the GC
+        # will run several times while walking the list, but it will
+        # see lower and lower memory usage, with no lower bound of n.
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -895,7 +895,7 @@
                     "use unwrap_spec(...=WrappedDefault(default))" % (
                     self._code.identifier, name, defaultval))
                 defs_w.append(None)
-            else:
+            elif name != '__args__' and name != 'args_w':
                 defs_w.append(space.wrap(defaultval))
         if self._code._unwrap_spec:
             UNDEFINED = object()
diff --git a/pypy/interpreter/miscutils.py b/pypy/interpreter/miscutils.py
--- a/pypy/interpreter/miscutils.py
+++ b/pypy/interpreter/miscutils.py
@@ -17,6 +17,9 @@
     def enter_thread(self, space):
         self._value = space.createexecutioncontext()
 
+    def try_enter_thread(self, space):
+        return False
+
     def signals_enabled(self):
         return True
 
diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py
--- a/pypy/interpreter/test/test_gateway.py
+++ b/pypy/interpreter/test/test_gateway.py
@@ -726,6 +726,22 @@
             never_called
         py.test.raises(AssertionError, space.wrap, gateway.interp2app_temp(g))
 
+    def test_unwrap_spec_default_applevel_bug2(self):
+        space = self.space
+        def g(space, w_x, w_y=None, __args__=None):
+            return w_x
+        w_g = space.wrap(gateway.interp2app_temp(g))
+        w_42 = space.call_function(w_g, space.wrap(42))
+        assert space.int_w(w_42) == 42
+        py.test.raises(gateway.OperationError, space.call_function, w_g)
+        #
+        def g(space, w_x, w_y=None, args_w=None):
+            return w_x
+        w_g = space.wrap(gateway.interp2app_temp(g))
+        w_42 = space.call_function(w_g, space.wrap(42))
+        assert space.int_w(w_42) == 42
+        py.test.raises(gateway.OperationError, space.call_function, w_g)
+
     def test_interp2app_doc(self):
         space = self.space
         def f(space, w_x):
diff --git a/pypy/module/_cffi_backend/ccallback.py b/pypy/module/_cffi_backend/ccallback.py
--- a/pypy/module/_cffi_backend/ccallback.py
+++ b/pypy/module/_cffi_backend/ccallback.py
@@ -183,9 +183,12 @@
         misc._raw_memclear(ll_res, SIZE_OF_FFI_ARG)
         return
     #
+    must_leave = False
     ec = None
+    space = callback.space
     try:
-        ec = cerrno.get_errno_container(callback.space)
+        must_leave = space.threadlocals.try_enter_thread(space)
+        ec = cerrno.get_errno_container(space)
         cerrno.save_errno_into(ec, e)
         extra_line = ''
         try:
@@ -206,5 +209,7 @@
         except OSError:
             pass
         callback.write_error_return_value(ll_res)
+    if must_leave:
+        space.threadlocals.leave_thread(space)
     if ec is not None:
         cerrno.restore_errno_from(ec)
diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -4,7 +4,7 @@
 
 import sys
 
-from rpython.rlib import jit, clibffi, jit_libffi
+from rpython.rlib import jit, clibffi, jit_libffi, rgc
 from rpython.rlib.jit_libffi import (CIF_DESCRIPTION, CIF_DESCRIPTION_P,
     FFI_TYPE, FFI_TYPE_P, FFI_TYPE_PP, SIZE_OF_FFI_ARG)
 from rpython.rlib.objectmodel import we_are_translated, instantiate
@@ -63,6 +63,7 @@
         CifDescrBuilder(fvarargs, self.ctitem).rawallocate(ctypefunc)
         return ctypefunc
 
+    @rgc.must_be_light_finalizer
     def __del__(self):
         if self.cif_descr:
             lltype.free(self.cif_descr, flavor='raw')
@@ -156,8 +157,8 @@
                     data = rffi.ptradd(buffer, cif_descr.exchange_args[i])
                     flag = get_mustfree_flag(data)
                     if flag == 1:
-                        raw_string = rffi.cast(rffi.CCHARPP, data)[0]
-                        lltype.free(raw_string, flavor='raw')
+                        raw_cdata = rffi.cast(rffi.CCHARPP, data)[0]
+                        lltype.free(raw_cdata, flavor='raw')
             lltype.free(buffer, flavor='raw')
         return w_res
 
diff --git a/pypy/module/_socket/__init__.py b/pypy/module/_socket/__init__.py
--- a/pypy/module/_socket/__init__.py
+++ b/pypy/module/_socket/__init__.py
@@ -6,8 +6,8 @@
     }
 
     interpleveldefs = {
-        'SocketType':  'interp_socket.W_RSocket',
-        'socket'    :  'interp_socket.W_RSocket',
+        'SocketType':  'interp_socket.W_Socket',
+        'socket'    :  'interp_socket.W_Socket',
         'error'     :  'interp_socket.get_error(space, "error")',
         'herror'    :  'interp_socket.get_error(space, "herror")',
         'gaierror'  :  'interp_socket.get_error(space, "gaierror")',
diff --git a/pypy/module/_socket/interp_func.py b/pypy/module/_socket/interp_func.py
--- a/pypy/module/_socket/interp_func.py
+++ b/pypy/module/_socket/interp_func.py
@@ -1,8 +1,12 @@
-from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
-from pypy.module._socket.interp_socket import converted_error, W_RSocket, addr_as_object, ipaddr_from_object
 from rpython.rlib import rsocket
 from rpython.rlib.rsocket import SocketError, INVALID_SOCKET
+
 from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
+from pypy.module._socket.interp_socket import (
+    converted_error, W_Socket, addr_as_object, ipaddr_from_object
+)
+
 
 def gethostname(space):
     """gethostname() -> string
@@ -136,10 +140,10 @@
     The remaining arguments are the same as for socket().
     """
     try:
-        sock = rsocket.fromfd(fd, family, type, proto, W_RSocket)
+        sock = rsocket.fromfd(fd, family, type, proto)
     except SocketError, e:
         raise converted_error(space, e)
-    return space.wrap(sock)
+    return space.wrap(W_Socket(sock))
 
 @unwrap_spec(family=int, type=int, proto=int)
 def socketpair(space, family=rsocket.socketpair_default_family,
@@ -153,10 +157,13 @@
     AF_UNIX if defined on the platform; otherwise, the default is AF_INET.
     """
     try:
-        sock1, sock2 = rsocket.socketpair(family, type, proto, W_RSocket)
+        sock1, sock2 = rsocket.socketpair(family, type, proto)
     except SocketError, e:
         raise converted_error(space, e)
-    return space.newtuple([space.wrap(sock1), space.wrap(sock2)])
+    return space.newtuple([
+        space.wrap(W_Socket(sock1)),
+        space.wrap(W_Socket(sock2))
+    ])
 
 # The following 4 functions refuse all negative numbers, like CPython 2.6.
 # They could also check that the argument is not too large, but CPython 2.6
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -1,14 +1,18 @@
+from rpython.rlib import rsocket
+from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rsocket import (
+    RSocket, AF_INET, SOCK_STREAM, SocketError, SocketErrorWithErrno,
+    RSocketError
+)
+from rpython.rtyper.lltypesystem import lltype, rffi
+
+from pypy.interpreter import gateway
 from pypy.interpreter.baseobjspace import W_Root
-from pypy.interpreter.typedef import TypeDef, make_weakref_descr,\
-     interp_attrproperty
+from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from rpython.rlib.rarithmetic import intmask
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib import rsocket
-from rpython.rlib.rsocket import RSocket, AF_INET, SOCK_STREAM
-from rpython.rlib.rsocket import SocketError, SocketErrorWithErrno, RSocketError
-from pypy.interpreter.error import OperationError, oefmt
-from pypy.interpreter import gateway
+from pypy.interpreter.typedef import (
+    GetSetProperty, TypeDef, make_weakref_descr
+)
 
 
 # XXX Hack to seperate rpython and pypy
@@ -124,10 +128,18 @@
     return addr
 
 
-class W_RSocket(W_Root, RSocket):
-    def __del__(self):
-        self.clear_all_weakrefs()
-        RSocket.__del__(self)
+class W_Socket(W_Root):
+    def __init__(self, sock):
+        self.sock = sock
+
+    def get_type_w(self, space):
+        return space.wrap(self.sock.type)
+
+    def get_proto_w(self, space):
+        return space.wrap(self.sock.proto)
+
+    def get_family_w(self, space):
+        return space.wrap(self.sock.family)
 
     def accept_w(self, space):
         """accept() -> (socket object, address info)
@@ -137,22 +149,22 @@
         info is a pair (hostaddr, port).
         """
         try:
-            fd, addr = self.accept()
+            fd, addr = self.sock.accept()
             sock = rsocket.make_socket(
-                fd, self.family, self.type, self.proto, W_RSocket)
-            return space.newtuple([space.wrap(sock),
+                fd, self.sock.family, self.sock.type, self.sock.proto)
+            return space.newtuple([space.wrap(W_Socket(sock)),
                                    addr_as_object(addr, sock.fd, space)])
-        except SocketError, e:
+        except SocketError as e:
             raise converted_error(space, e)
 
     # convert an Address into an app-level object
     def addr_as_object(self, space, address):
-        return addr_as_object(address, self.fd, space)
+        return addr_as_object(address, self.sock.fd, space)
 
     # convert an app-level object into an Address
     # based on the current socket's family
     def addr_from_object(self, space, w_address):
-        return addr_from_object(self.family, space, w_address)
+        return addr_from_object(self.sock.family, space, w_address)
 
     def bind_w(self, space, w_addr):
         """bind(address)
@@ -162,8 +174,8 @@
         sockets the address is a tuple (ifname, proto [,pkttype [,hatype]])
         """
         try:
-            self.bind(self.addr_from_object(space, w_addr))
-        except SocketError, e:
+            self.sock.bind(self.addr_from_object(space, w_addr))
+        except SocketError as e:
             raise converted_error(space, e)
 
     def close_w(self, space):
@@ -172,7 +184,7 @@
         Close the socket.  It cannot be used after this call.
         """
         try:
-            self.close()
+            self.sock.close()
         except SocketError:
             # cpython doesn't return any errors on close
             pass
@@ -184,8 +196,8 @@
         is a pair (host, port).
         """
         try:
-            self.connect(self.addr_from_object(space, w_addr))
-        except SocketError, e:
+            self.sock.connect(self.addr_from_object(space, w_addr))
+        except SocketError as e:
             raise converted_error(space, e)
 
     def connect_ex_w(self, space, w_addr):
@@ -196,15 +208,16 @@
         """
         try:
             addr = self.addr_from_object(space, w_addr)
-        except SocketError, e:
+        except SocketError as e:
             raise converted_error(space, e)
-        error = self.connect_ex(addr)
+        error = self.sock.connect_ex(addr)
         return space.wrap(error)
 
     def dup_w(self, space):
         try:
-            return self.dup(W_RSocket)
-        except SocketError, e:
+            sock = self.sock.dup()
+            return W_Socket(sock)
+        except SocketError as e:
             raise converted_error(space, e)
 
     def fileno_w(self, space):
@@ -212,7 +225,7 @@
 
         Return the integer file descriptor of the socket.
         """
-        return space.wrap(intmask(self.fd))
+        return space.wrap(intmask(self.sock.fd))
 
     def getpeername_w(self, space):
         """getpeername() -> address info
@@ -221,9 +234,9 @@
         info is a pair (hostaddr, port).
         """
         try:
-            addr = self.getpeername()
-            return addr_as_object(addr, self.fd, space)
-        except SocketError, e:
+            addr = self.sock.getpeername()
+            return addr_as_object(addr, self.sock.fd, space)
+        except SocketError as e:
             raise converted_error(space, e)
 
     def getsockname_w(self, space):
@@ -233,9 +246,9 @@
         info is a pair (hostaddr, port).
         """
         try:
-            addr = self.getsockname()
-            return addr_as_object(addr, self.fd, space)
-        except SocketError, e:
+            addr = self.sock.getsockname()
+            return addr_as_object(addr, self.sock.fd, space)
+        except SocketError as e:
             raise converted_error(space, e)
 
     @unwrap_spec(level=int, optname=int)
@@ -248,11 +261,11 @@
         """
         if w_buflen is None:
             try:
-                return space.wrap(self.getsockopt_int(level, optname))
-            except SocketError, e:
+                return space.wrap(self.sock.getsockopt_int(level, optname))
+            except SocketError as e:
                 raise converted_error(space, e)
         buflen = space.int_w(w_buflen)
-        return space.wrap(self.getsockopt(level, optname, buflen))
+        return space.wrap(self.sock.getsockopt(level, optname, buflen))
 
     def gettimeout_w(self, space):
         """gettimeout() -> timeout
@@ -260,7 +273,7 @@
         Returns the timeout in floating seconds associated with socket
         operations. A timeout of None indicates that timeouts on socket
         """
-        timeout = self.gettimeout()
+        timeout = self.sock.gettimeout()
         if timeout < 0.0:
             return space.w_None
         return space.wrap(timeout)
@@ -274,8 +287,8 @@
         will allow before refusing new connections.
         """
         try:
-            self.listen(backlog)
-        except SocketError, e:
+            self.sock.listen(backlog)
+        except SocketError as e:
             raise converted_error(space, e)
 
     @unwrap_spec(w_mode = WrappedDefault("r"),
@@ -298,8 +311,8 @@
         the remote end is closed and all data is read, return the empty string.
         """
         try:
-            data = self.recv(buffersize, flags)
-        except SocketError, e:
+            data = self.sock.recv(buffersize, flags)
+        except SocketError as e:
             raise converted_error(space, e)
         return space.wrap(data)
 
@@ -310,13 +323,13 @@
         Like recv(buffersize, flags) but also return the sender's address info.
         """
         try:
-            data, addr = self.recvfrom(buffersize, flags)
+            data, addr = self.sock.recvfrom(buffersize, flags)
             if addr:
-                w_addr = addr_as_object(addr, self.fd, space)
+                w_addr = addr_as_object(addr, self.sock.fd, space)
             else:
                 w_addr = space.w_None
             return space.newtuple([space.wrap(data), w_addr])
-        except SocketError, e:
+        except SocketError as e:
             raise converted_error(space, e)
 
     @unwrap_spec(data='bufferstr', flags=int)
@@ -328,8 +341,8 @@
         sent; this may be less than len(data) if the network is busy.
         """
         try:
-            count = self.send(data, flags)
-        except SocketError, e:
+            count = self.sock.send(data, flags)
+        except SocketError as e:
             raise converted_error(space, e)
         return space.wrap(count)
 
@@ -343,8 +356,9 @@
         to tell how much data has been sent.
         """
         try:
-            self.sendall(data, flags, space.getexecutioncontext().checksignals)
-        except SocketError, e:
+            self.sock.sendall(
+                data, flags, space.getexecutioncontext().checksignals)
+        except SocketError as e:
             raise converted_error(space, e)
 
     @unwrap_spec(data='bufferstr')
@@ -364,8 +378,8 @@
             w_addr = w_param3
         try:
             addr = self.addr_from_object(space, w_addr)
-            count = self.sendto(data, flags, addr)
-        except SocketError, e:
+            count = self.sock.sendto(data, flags, addr)
+        except SocketError as e:
             raise converted_error(space, e)
         return space.wrap(count)
 
@@ -377,7 +391,7 @@
         setblocking(True) is equivalent to settimeout(None);
         setblocking(False) is equivalent to settimeout(0.0).
         """
-        self.setblocking(flag)
+        self.sock.setblocking(flag)
 
     @unwrap_spec(level=int, optname=int)
     def setsockopt_w(self, space, level, optname, w_optval):
@@ -391,13 +405,13 @@
         except:
             optval = space.str_w(w_optval)
             try:
-                self.setsockopt(level, optname, optval)
-            except SocketError, e:
+                self.sock.setsockopt(level, optname, optval)
+            except SocketError as e:
                 raise converted_error(space, e)
             return
         try:
-            self.setsockopt_int(level, optname, optval)
-        except SocketError, e:
+            self.sock.setsockopt_int(level, optname, optval)
+        except SocketError as e:
             raise converted_error(space, e)
 
     def settimeout_w(self, space, w_timeout):
@@ -415,7 +429,7 @@
             if timeout < 0.0:
                 raise OperationError(space.w_ValueError,
                                      space.wrap('Timeout value out of range'))
-        self.settimeout(timeout)
+        self.sock.settimeout(timeout)
 
     @unwrap_spec(nbytes=int, flags=int)
     def recv_into_w(self, space, w_buffer, nbytes=0, flags=0):
@@ -424,8 +438,8 @@
         if nbytes == 0 or nbytes > lgt:
             nbytes = lgt
         try:
-            return space.wrap(self.recvinto(rwbuffer, nbytes, flags))
-        except SocketError, e:
+            return space.wrap(self.sock.recvinto(rwbuffer, nbytes, flags))
+        except SocketError as e:
             raise converted_error(space, e)
 
     @unwrap_spec(nbytes=int, flags=int)
@@ -435,13 +449,13 @@
         if nbytes == 0 or nbytes > lgt:
             nbytes = lgt
         try:
-            readlgt, addr = self.recvfrom_into(rwbuffer, nbytes, flags)
+            readlgt, addr = self.sock.recvfrom_into(rwbuffer, nbytes, flags)
             if addr:
-                w_addr = addr_as_object(addr, self.fd, space)
+                w_addr = addr_as_object(addr, self.sock.fd, space)
             else:
                 w_addr = space.w_None
             return space.newtuple([space.wrap(readlgt), w_addr])
-        except SocketError, e:
+        except SocketError as e:
             raise converted_error(space, e)
 
     @unwrap_spec(cmd=int)
@@ -473,7 +487,7 @@
                     option_ptr.c_keepaliveinterval = space.uint_w(w_interval)
 
                 res = _c.WSAIoctl(
-                    self.fd, cmd, value_ptr, value_size,
+                    self.sock.fd, cmd, value_ptr, value_size,
                     rffi.NULL, 0, recv_ptr, rffi.NULL, rffi.NULL)
                 if res < 0:
                     raise converted_error(space, rsocket.last_error())
@@ -494,8 +508,8 @@
         (flag == SHUT_RDWR).
         """
         try:
-            self.shutdown(how)
-        except SocketError, e:
+            self.sock.shutdown(how)
+        except SocketError as e:
             raise converted_error(space, e)
 
     #------------------------------------------------------------
@@ -536,12 +550,13 @@
 @unwrap_spec(family=int, type=int, proto=int)
 def newsocket(space, w_subtype, family=AF_INET,
               type=SOCK_STREAM, proto=0):
-    sock = space.allocate_instance(W_RSocket, w_subtype)
+    self = space.allocate_instance(W_Socket, w_subtype)
     try:
-        W_RSocket.__init__(sock, family, type, proto)
-    except SocketError, e:
+        sock = RSocket(family, type, proto)
+    except SocketError as e:
         raise converted_error(space, e)
-    return space.wrap(sock)
+    W_Socket.__init__(self, sock)
+    return space.wrap(self)
 descr_socket_new = interp2app(newsocket)
 
 # ____________________________________________________________
@@ -597,10 +612,10 @@
 
 socketmethods = {}
 for methodname in socketmethodnames:
-    method = getattr(W_RSocket, methodname + '_w')
+    method = getattr(W_Socket, methodname + '_w')
     socketmethods[methodname] = interp2app(method)
 
-W_RSocket.typedef = TypeDef("_socket.socket",
+W_Socket.typedef = TypeDef("_socket.socket",
     __doc__ = """\
 socket([family[, type[, proto]]]) -> socket object
 
@@ -639,9 +654,9 @@
 
  [*] not available on all platforms!""",
     __new__ = descr_socket_new,
-    __weakref__ = make_weakref_descr(W_RSocket),
-    type = interp_attrproperty('type', W_RSocket),
-    proto = interp_attrproperty('proto', W_RSocket),
-    family = interp_attrproperty('family', W_RSocket),
+    __weakref__ = make_weakref_descr(W_Socket),
+    type = GetSetProperty(W_Socket.get_type_w),
+    proto = GetSetProperty(W_Socket.get_proto_w),
+    family = GetSetProperty(W_Socket.get_family_w),
     ** socketmethods
     )
diff --git a/pypy/module/_weakref/test/test_weakref.py b/pypy/module/_weakref/test/test_weakref.py
--- a/pypy/module/_weakref/test/test_weakref.py
+++ b/pypy/module/_weakref/test/test_weakref.py
@@ -15,6 +15,10 @@
         gc.collect()
         assert ref() is None
 
+    def test_missing_arg(self):
+        import _weakref
+        raises(TypeError, _weakref.ref)
+
     def test_callback(self):
         import _weakref, gc
         class A(object):
diff --git a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py
--- a/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/backend_tests.py
@@ -866,25 +866,25 @@
 
     def test_enum(self):
         ffi = FFI(backend=self.Backend())
-        ffi.cdef("enum foo { A, B, CC, D };")
-        assert ffi.string(ffi.cast("enum foo", 0)) == "A"
-        assert ffi.string(ffi.cast("enum foo", 2)) == "CC"
-        assert ffi.string(ffi.cast("enum foo", 3)) == "D"
+        ffi.cdef("enum foo { A0, B0, CC0, D0 };")
+        assert ffi.string(ffi.cast("enum foo", 0)) == "A0"
+        assert ffi.string(ffi.cast("enum foo", 2)) == "CC0"
+        assert ffi.string(ffi.cast("enum foo", 3)) == "D0"
         assert ffi.string(ffi.cast("enum foo", 4)) == "4"
-        ffi.cdef("enum bar { A, B=-2, CC, D, E };")
-        assert ffi.string(ffi.cast("enum bar", 0)) == "A"
-        assert ffi.string(ffi.cast("enum bar", -2)) == "B"
-        assert ffi.string(ffi.cast("enum bar", -1)) == "CC"
-        assert ffi.string(ffi.cast("enum bar", 1)) == "E"
+        ffi.cdef("enum bar { A1, B1=-2, CC1, D1, E1 };")
+        assert ffi.string(ffi.cast("enum bar", 0)) == "A1"
+        assert ffi.string(ffi.cast("enum bar", -2)) == "B1"
+        assert ffi.string(ffi.cast("enum bar", -1)) == "CC1"
+        assert ffi.string(ffi.cast("enum bar", 1)) == "E1"
         assert ffi.cast("enum bar", -2) != ffi.cast("enum bar", -2)
         assert ffi.cast("enum foo", 0) != ffi.cast("enum bar", 0)
         assert ffi.cast("enum bar", 0) != ffi.cast("int", 0)
-        assert repr(ffi.cast("enum bar", -1)) == "<cdata 'enum bar' -1: CC>"
+        assert repr(ffi.cast("enum bar", -1)) == "<cdata 'enum bar' -1: CC1>"
         assert repr(ffi.cast("enum foo", -1)) == (  # enums are unsigned, if
             "<cdata 'enum foo' 4294967295>")        # they contain no neg value
-        ffi.cdef("enum baz { A=0x1000, B=0x2000 };")
-        assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A"
-        assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B"
+        ffi.cdef("enum baz { A2=0x1000, B2=0x2000 };")
+        assert ffi.string(ffi.cast("enum baz", 0x1000)) == "A2"
+        assert ffi.string(ffi.cast("enum baz", 0x2000)) == "B2"
 
     def test_enum_in_struct(self):
         ffi = FFI(backend=self.Backend())
@@ -1323,6 +1323,16 @@
         e = ffi.cast("enum e", 0)
         assert ffi.string(e) == "AA"     # pick the first one arbitrarily
 
+    def test_enum_refer_previous_enum_value(self):
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("enum e { AA, BB=2, CC=4, DD=BB, EE, FF=CC, GG=FF };")
+        assert ffi.string(ffi.cast("enum e", 2)) == "BB"
+        assert ffi.string(ffi.cast("enum e", 3)) == "EE"
+        assert ffi.sizeof("char[DD]") == 2
+        assert ffi.sizeof("char[EE]") == 3
+        assert ffi.sizeof("char[FF]") == 4
+        assert ffi.sizeof("char[GG]") == 4
+
     def test_nested_anonymous_struct(self):
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
@@ -1544,6 +1554,7 @@
         ffi2.include(ffi1)
         p = ffi2.cast("enum foo", 1)
         assert ffi2.string(p) == "FB"
+        assert ffi2.sizeof("char[FC]") == 2
 
     def test_include_typedef_2(self):
         backend = self.Backend()
@@ -1564,10 +1575,32 @@
         assert ffi.alignof("struct is_packed") == 1
         s = ffi.new("struct is_packed[2]")
         s[0].b = 42623381
-        s[0].a = 'X'
+        s[0].a = b'X'
         s[1].b = -4892220
-        s[1].a = 'Y'
+        s[1].a = b'Y'
         assert s[0].b == 42623381
-        assert s[0].a == 'X'
+        assert s[0].a == b'X'
         assert s[1].b == -4892220
-        assert s[1].a == 'Y'
+        assert s[1].a == b'Y'
+
+    def test_define_integer_constant(self):
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            #define DOT_0 0
+            #define DOT 100
+            #define DOT_OCT 0100l
+            #define DOT_HEX 0x100u
+            #define DOT_HEX2 0X10
+            #define DOT_UL 1000UL
+            enum foo {AA, BB=DOT, CC};
+        """)
+        lib = ffi.dlopen(None)
+        assert ffi.string(ffi.cast("enum foo", 100)) == "BB"
+        assert lib.DOT_0 == 0
+        assert lib.DOT == 100
+        assert lib.DOT_OCT == 0o100
+        assert lib.DOT_HEX == 0x100
+        assert lib.DOT_HEX2 == 0x10
+        assert lib.DOT_UL == 1000
+
+
diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/test_function.py
--- a/pypy/module/test_lib_pypy/cffi_tests/test_function.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/test_function.py
@@ -36,13 +36,11 @@
         return self._value
 
 lib_m = 'm'
-has_sinf = True
 if sys.platform == 'win32':
     #there is a small chance this fails on Mingw via environ $CC
     import distutils.ccompiler
     if distutils.ccompiler.get_default_compiler() == 'msvc':
         lib_m = 'msvcrt'
-        has_sinf = False
 
 class TestFunction(object):
     Backend = CTypesBackend
@@ -57,8 +55,8 @@
         assert x == math.sin(1.23)
 
     def test_sinf(self):
-        if not has_sinf:
-            py.test.skip("sinf not available")
+        if sys.platform == 'win32':
+            py.test.skip("no sinf found in the Windows stdlib")
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
             float sinf(float x);
diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py
--- a/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/test_parsing.py
@@ -162,9 +162,10 @@
 
 def test_define_not_supported_for_now():
     ffi = FFI(backend=FakeBackend())
-    e = py.test.raises(CDefError, ffi.cdef, "#define FOO 42")
-    assert str(e.value) == \
-           'only supports the syntax "#define FOO ..." for now (literally)'
+    e = py.test.raises(CDefError, ffi.cdef, '#define FOO "blah"')
+    assert str(e.value) == (
+        'only supports the syntax "#define FOO ..." (literally)'
+        ' or "#define FOO 0x1FF" for now')
 
 def test_unnamed_struct():
     ffi = FFI(backend=FakeBackend())
diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py
--- a/pypy/module/test_lib_pypy/cffi_tests/test_verify.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/test_verify.py
@@ -1,5 +1,5 @@
 # Generated by pypy/tool/import_cffi.py
-import py
+import py, re
 import sys, os, math, weakref
 from cffi import FFI, VerificationError, VerificationMissing, model
 from pypy.module.test_lib_pypy.cffi_tests.support import *
@@ -30,6 +30,24 @@
 def setup_module():
     import cffi.verifier
     cffi.verifier.cleanup_tmpdir()
+    #
+    # check that no $ sign is produced in the C file; it used to be the
+    # case that anonymous enums would produce '$enum_$1', which was
+    # used as part of a function name.  GCC accepts such names, but it's
+    # apparently non-standard.
+    _r_comment = re.compile(r"/\*.*?\*/|//.*?$", re.DOTALL | re.MULTILINE)
+    _r_string = re.compile(r'\".*?\"')
+    def _write_source_and_check(self, file=None):
+        base_write_source(self, file)
+        if file is None:
+            f = open(self.sourcefilename)
+            data = f.read()
+            f.close()
+            data = _r_comment.sub(' ', data)
+            data = _r_string.sub('"skipped"', data)
+            assert '$' not in data
+    base_write_source = cffi.verifier.Verifier._write_source
+    cffi.verifier.Verifier._write_source = _write_source_and_check
 
 
 def test_module_type():
@@ -154,6 +172,9 @@
 
 
 all_primitive_types = model.PrimitiveType.ALL_PRIMITIVE_TYPES
+if sys.platform == 'win32':
+    all_primitive_types = all_primitive_types.copy()
+    del all_primitive_types['ssize_t']
 all_integer_types = sorted(tp for tp in all_primitive_types
                            if all_primitive_types[tp] == 'i')
 all_float_types = sorted(tp for tp in all_primitive_types
@@ -1453,8 +1474,8 @@
     assert func() == 42
 
 def test_FILE_stored_in_stdout():
-    if sys.platform == 'win32':
-        py.test.skip("MSVC: cannot assign to stdout")
+    if not sys.platform.startswith('linux'):
+        py.test.skip("likely, we cannot assign to stdout")
     ffi = FFI()
     ffi.cdef("int printf(const char *, ...); FILE *setstdout(FILE *);")
     lib = ffi.verify("""
@@ -1637,8 +1658,8 @@
     ffi = FFI()
     ffi.cdef("""
         int (*python_callback)(int how_many, int *values);
-        void *const c_callback;   /* pass this ptr to C routines */
-        int some_c_function(void *cb);
+        int (*const c_callback)(int,...);   /* pass this ptr to C routines */
+        int some_c_function(int(*cb)(int,...));
     """)
     lib = ffi.verify("""
         #include <stdarg.h>
@@ -1885,3 +1906,60 @@
     p = lib.f2(42)
     x = lib.f1(p)
     assert x == 42
+
+def _run_in_multiple_threads(test1):
+    test1()
+    import sys
+    try:
+        import thread
+    except ImportError:
+        import _thread as thread
+    errors = []
+    def wrapper(lock):
+        try:
+            test1()
+        except:
+            errors.append(sys.exc_info())
+        lock.release()
+    locks = []
+    for i in range(10):
+        _lock = thread.allocate_lock()
+        _lock.acquire()
+        thread.start_new_thread(wrapper, (_lock,))
+        locks.append(_lock)
+    for _lock in locks:
+        _lock.acquire()
+        if errors:
+            raise errors[0][1]
+
+def test_errno_working_even_with_pypys_jit():
+    ffi = FFI()
+    ffi.cdef("int f(int);")
+    lib = ffi.verify("""
+        #include <errno.h>
+        int f(int x) { return (errno = errno + x); }
+    """)
+    @_run_in_multiple_threads
+    def test1():
+        ffi.errno = 0
+        for i in range(10000):
+            e = lib.f(1)
+            assert e == i + 1
+            assert ffi.errno == e
+        for i in range(10000):
+            ffi.errno = i
+            e = lib.f(42)
+            assert e == i + 42
+
+def test_getlasterror_working_even_with_pypys_jit():
+    if sys.platform != 'win32':
+        py.test.skip("win32-only test")
+    ffi = FFI()
+    ffi.cdef("void SetLastError(DWORD);")
+    lib = ffi.dlopen("Kernel32.dll")
+    @_run_in_multiple_threads
+    def test1():
+        for i in range(10000):
+            n = (1 << 29) + i
+            lib.SetLastError(n)
+            assert ffi.getwinerror()[0] == n
diff --git a/pypy/module/test_lib_pypy/cffi_tests/test_version.py b/pypy/module/test_lib_pypy/cffi_tests/test_version.py
--- a/pypy/module/test_lib_pypy/cffi_tests/test_version.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/test_version.py
@@ -11,7 +11,6 @@
     '0.7.1': '0.7',     # did not change
     '0.7.2': '0.7',     # did not change
     '0.8.1': '0.8',     # did not change (essentially)
-    '0.8.2': '0.8',     # did not change
     }
 
 def test_version():
@@ -26,7 +25,7 @@
     content = open(p).read()
     #
     v = cffi.__version__
-    assert ("version = '%s'\n" % BACKEND_VERSIONS.get(v, v)) in content
+    assert ("version = '%s'\n" % v[:3]) in content
     assert ("release = '%s'\n" % v) in content
 
 def test_doc_version_file():
diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py
--- a/pypy/module/thread/gil.py
+++ b/pypy/module/thread/gil.py
@@ -7,7 +7,7 @@
 # all but one will be blocked.  The other threads get a chance to run
 # from time to time, using the periodic action GILReleaseAction.
 
-from rpython.rlib import rthread
+from rpython.rlib import rthread, rgil
 from pypy.module.thread.error import wrap_thread_error
 from pypy.interpreter.executioncontext import PeriodicAsyncAction
 from pypy.module.thread.threadlocals import OSThreadLocals
@@ -25,8 +25,7 @@
                                                   use_bytecode_counter=True)
 
     def _initialize_gil(self, space):
-        if not rthread.gil_allocate():
-            raise wrap_thread_error(space, "can't allocate GIL")
+        rgil.gil_allocate()
 
     def setup_threads(self, space):
         """Enable threads in the object space, if they haven't already been."""
@@ -71,15 +70,13 @@
 def before_external_call():
     # this function must not raise, in such a way that the exception
     # transformer knows that it cannot raise!
-    e = get_errno()
-    rthread.gil_release()
-    set_errno(e)
+    rgil.gil_release()
 before_external_call._gctransformer_hint_cannot_collect_ = True
 before_external_call._dont_reach_me_in_del_ = True
 
 def after_external_call():
     e = get_errno()
-    rthread.gil_acquire()
+    rgil.gil_acquire()
     rthread.gc_thread_run()
     after_thread_switch()
     set_errno(e)
@@ -97,7 +94,7 @@
     # explicitly release the gil, in a way that tries to give more
     # priority to other threads (as opposed to continuing to run in
     # the same thread).
-    if rthread.gil_yield_thread():
+    if rgil.gil_yield_thread():
         rthread.gc_thread_run()
         after_thread_switch()
 do_yield_thread._gctransformer_hint_close_stack_ = True
diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py
--- a/pypy/module/thread/threadlocals.py
+++ b/pypy/module/thread/threadlocals.py
@@ -27,6 +27,12 @@
         "Notification that the current thread is about to start running."
         self._set_ec(space.createexecutioncontext())
 
+    def try_enter_thread(self, space):
+        if rthread.get_ident() in self._valuedict:
+            return False
+        self.enter_thread(space)
+        return True
+
     def _set_ec(self, ec):
         ident = rthread.get_ident()
         if self._mainthreadident == 0 or self._mainthreadident == ident:
diff --git a/pypy/tool/gcdump.py b/pypy/tool/gcdump.py
--- a/pypy/tool/gcdump.py
+++ b/pypy/tool/gcdump.py
@@ -43,7 +43,7 @@
 
     def print_summary(self):
         items = self.summary.items()
-        items.sort(key=lambda(typenum, stat): stat[1])    # sort by totalsize
+        items.sort(key=lambda (typenum, stat): stat[1])    # sort by totalsize
         totalsize = 0
         for typenum, stat in items:
             totalsize += stat[1]
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -303,28 +303,39 @@
 
     @staticmethod
     @rgc.no_collect
-    def _release_gil_asmgcc(css):
-        # similar to trackgcroot.py:pypy_asm_stackwalk, first part
-        from rpython.memory.gctransform import asmgcroot
-        new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
-        next = asmgcroot.gcrootanchor.next
-        new.next = next
-        new.prev = asmgcroot.gcrootanchor
-        asmgcroot.gcrootanchor.next = new
-        next.prev = new
-        # and now release the GIL
-        before = rffi.aroundstate.before
-        if before:
-            before()
+    def _reacquire_gil_asmgcc(css, old_rpy_fastgil):
+        # Before doing an external call, 'rpy_fastgil' is initialized to
+        # be equal to css.  This function is called if we find out after
+        # the call that it is no longer equal to css.  See description
+        # in translator/c/src/thread_pthread.c.
 
-    @staticmethod
-    @rgc.no_collect
-    def _reacquire_gil_asmgcc(css):
-        # first reacquire the GIL
-        after = rffi.aroundstate.after
-        if after:
-            after()
-        # similar to trackgcroot.py:pypy_asm_stackwalk, second part
+        if old_rpy_fastgil == 0:
+            # this case occurs if some other thread stole the GIL but
+            # released it again.  What occurred here is that we changed
+            # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the
+            # GIL.
+            pass
+
+        elif old_rpy_fastgil == 1:
+            # 'rpy_fastgil' was (and still is) locked by someone else.
+            # We need to wait for the regular mutex.
+            after = rffi.aroundstate.after
+            if after:
+                after()
+        else:
+            # stole the GIL from a different thread that is also
+            # currently in an external call from the jit.  Attach
+            # the 'old_rpy_fastgil' into the chained list.
+            from rpython.memory.gctransform import asmgcroot
+            oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil)
+            next = asmgcroot.gcrootanchor.next
+            oth.next = next
+            oth.prev = asmgcroot.gcrootanchor
+            asmgcroot.gcrootanchor.next = oth
+            next.prev = oth
+
+        # similar to trackgcroot.py:pypy_asm_stackwalk, second part:
+        # detach the 'css' from the chained list
         from rpython.memory.gctransform import asmgcroot
         old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
         prev = old.prev
@@ -334,42 +345,28 @@
 
     @staticmethod
     @rgc.no_collect
-    def _release_gil_shadowstack():
-        before = rffi.aroundstate.before
-        if before:
-            before()
-
-    @staticmethod
-    @rgc.no_collect
     def _reacquire_gil_shadowstack():
+        # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode,
+        # 'rpy_fastgil' contains only zero or non-zero, and this is only
+        # called when the old value stored in 'rpy_fastgil' was non-zero
+        # (i.e. still locked, must wait with the regular mutex)
         after = rffi.aroundstate.after
         if after:
             after()
 
-    @staticmethod
-    def _no_op():
-        pass
-
-    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
-    _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
-                                                  lltype.Void))
+    _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+    _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed],
+                                                 lltype.Void))
 
     def _build_release_gil(self, gcrootmap):
-        if gcrootmap is None:
-            releasegil_func = llhelper(self._NOARG_FUNC, self._no_op)
-            reacqgil_func = llhelper(self._NOARG_FUNC, self._no_op)
-        elif gcrootmap.is_shadow_stack:
-            releasegil_func = llhelper(self._NOARG_FUNC,
-                                       self._release_gil_shadowstack)
-            reacqgil_func = llhelper(self._NOARG_FUNC,
+        if gcrootmap is None or gcrootmap.is_shadow_stack:
+            reacqgil_func = llhelper(self._REACQGIL0_FUNC,
                                      self._reacquire_gil_shadowstack)
+            self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
         else:
-            releasegil_func = llhelper(self._CLOSESTACK_FUNC,
-                                       self._release_gil_asmgcc)
-            reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
+            reacqgil_func = llhelper(self._REACQGIL2_FUNC,
                                      self._reacquire_gil_asmgcc)
-        self.releasegil_addr  = self.cpu.cast_ptr_to_int(releasegil_func)
-        self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+            self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
 
     def _is_asmgcc(self):
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
diff --git a/rpython/jit/backend/llsupport/callbuilder.py b/rpython/jit/backend/llsupport/callbuilder.py
--- a/rpython/jit/backend/llsupport/callbuilder.py
+++ b/rpython/jit/backend/llsupport/callbuilder.py
@@ -1,4 +1,7 @@
 from rpython.rlib.clibffi import FFI_DEFAULT_ABI
+from rpython.rlib import rgil
+from rpython.rtyper.lltypesystem import lltype, rffi
+
 
 class AbstractCallBuilder(object):
 
@@ -42,20 +45,21 @@
     def emit_call_release_gil(self):
         """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr
         and reacqgil_addr."""
+        fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil())
         self.select_call_release_gil_mode()
         self.prepare_arguments()
         self.push_gcmap_for_call_release_gil()
-        self.call_releasegil_addr_and_move_real_arguments()
+        self.call_releasegil_addr_and_move_real_arguments(fastgil)
         self.emit_raw_call()
         self.restore_stack_pointer()
-        self.move_real_result_and_call_reacqgil_addr()
+        self.move_real_result_and_call_reacqgil_addr(fastgil)
         self.pop_gcmap()
         self.load_result()
 
-    def call_releasegil_addr_and_move_real_arguments(self):
+    def call_releasegil_addr_and_move_real_arguments(self, fastgil):
         raise NotImplementedError
 
-    def move_real_result_and_call_reacqgil_addr(self):
+    def move_real_result_and_call_reacqgil_addr(self, fastgil):
         raise NotImplementedError
 
     def select_call_release_gil_mode(self):
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -2,6 +2,7 @@
 """ Tests for register allocation for common constructs
 """
 
+import py
 import re
 from rpython.jit.metainterp.history import TargetToken, BasicFinalDescr,\
      JitCellToken, BasicFailDescr, AbstractDescr
@@ -780,6 +781,9 @@
         assert rffi.cast(JITFRAMEPTR, cpu.gc_ll_descr.write_barrier_on_frame_called) == frame
 
     def test_call_release_gil(self):
+        py.test.skip("xxx fix this test: the code is now assuming that "
+                     "'before' is just rgil.release_gil(), and 'after' is "
+                     "only needed if 'rpy_fastgil' was not changed.")
         # note that we can't test floats here because when untranslated
         # people actually wreck xmm registers
         cpu = self.cpu
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -3,13 +3,16 @@
 from rpython.rlib.jit import JitDriver, unroll_parameters, set_param
 from rpython.rlib.jit import PARAMETERS, dont_look_inside
 from rpython.rlib.jit import promote
-from rpython.rlib import jit_hooks
+from rpython.rlib import jit_hooks, rposix
 from rpython.rlib.objectmodel import keepalive_until_here
 from rpython.rlib.rthread import ThreadLocalReference
 from rpython.jit.backend.detect_cpu import getcpuclass
 from rpython.jit.backend.test.support import CCompiledMixin
 from rpython.jit.codewriter.policy import StopAtXPolicy
 from rpython.config.config import ConfigError
+from rpython.translator.tool.cbuild import ExternalCompilationInfo
+from rpython.rtyper.lltypesystem import lltype, rffi
+
 
 class TranslationTest(CCompiledMixin):
     CPUClass = getcpuclass()
@@ -24,6 +27,8 @@
         # - full optimizer
         # - floats neg and abs
         # - threadlocalref_get
+        # - get_errno, set_errno
+        # - llexternal with macro=True
 
         class Frame(object):
             _virtualizable_ = ['i']
@@ -35,9 +40,15 @@
             pass
         t = ThreadLocalReference(Foo)
 
-        @dont_look_inside
-        def myabs(x):
-            return abs(x)
+        eci = ExternalCompilationInfo(post_include_bits=['''
+#define pypy_my_fabs(x)  fabs(x)
+'''])
+        myabs1 = rffi.llexternal('pypy_my_fabs', [lltype.Float],
+                                 lltype.Float, macro=True, releasegil=False,
+                                 compilation_info=eci)
+        myabs2 = rffi.llexternal('pypy_my_fabs', [lltype.Float],
+                                 lltype.Float, macro=True, releasegil=True,
+                                 compilation_info=eci)
 
         jitdriver = JitDriver(greens = [],
                               reds = ['total', 'frame', 'j'],
@@ -60,13 +71,14 @@
                 frame.i -= 1
                 j *= -0.712
                 if j + (-j):    raise ValueError
-                k = myabs(j)
+                k = myabs1(myabs2(j))
                 if k - abs(j):  raise ValueError
                 if k - abs(-j): raise ValueError
                 if t.get().nine != 9: raise ValueError
+                rposix.set_errno(total)
+                if rposix.get_errno() != total: raise ValueError
             return chr(total % 253)
         #
-        from rpython.rtyper.lltypesystem import lltype, rffi
         from rpython.rlib.libffi import types, CDLL, ArgChain
         from rpython.rlib.test.test_clibffi import get_libm_name
         libm_name = get_libm_name(sys.platform)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1934,42 +1934,6 @@
         self._genop_call(op, arglocs, result_loc, is_call_release_gil=True)
         self._emit_guard_not_forced(guard_token)
 
-    def call_reacquire_gil(self, gcrootmap, save_loc):
-        # save the previous result (eax/xmm0) into the stack temporarily.
-        # XXX like with call_release_gil(), we assume that we don't need
-        # to save xmm0 in this case.
-        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
-            self.mc.MOV_sr(WORD, save_loc.value)
-        # call the reopenstack() function (also reacquiring the GIL)
-        if gcrootmap.is_shadow_stack:
-            args = []
-            css = 0
-        else:
-            from rpython.memory.gctransform import asmgcroot
-            css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
-            if IS_X86_32:
-                reg = eax
-            elif IS_X86_64:
-                reg = edi
-            self.mc.LEA_rs(reg.value, css)
-            args = [reg]
-        self._emit_call(imm(self.reacqgil_addr), args, can_collect=False)
-        #
-        # Now that we required the GIL, we can reload a possibly modified ebp
-        if not gcrootmap.is_shadow_stack:
-            # special-case: reload ebp from the css
-            from rpython.memory.gctransform import asmgcroot
-            index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
-            self.mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
-        #else:
-        #   for shadowstack, done for us by _reload_frame_if_necessary()
-        self._reload_frame_if_necessary(self.mc)
-        self.set_extra_stack_depth(self.mc, 0)
-        #
-        # restore the result from the stack
-        if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
-            self.mc.MOV_rs(save_loc.value, WORD)
-
     def imm(self, v):
         return imm(v)
 
@@ -2361,12 +2325,38 @@
         ed = effectinfo.extradescrs[0]
         assert isinstance(ed, ThreadLocalRefDescr)
         addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr())
+        # 'addr1' is the address is the current thread, but we assume that
+        # it is a thread-local at a constant offset from %fs/%gs.
         addr0 = stmtlocal.threadlocal_base()
         addr = addr1 - addr0
         assert rx86.fits_in_32bits(addr)
         mc = self.mc
-        mc.writechar(stmtlocal.SEGMENT_TL)     # prefix
-        mc.MOV_rj(resloc.value, addr)
+        mc.writechar(stmtlocal.SEGMENT_TL)     # prefix: %fs or %gs
+        mc.MOV_rj(resloc.value, addr)          # memory read
+
+    def get_set_errno(self, op, loc, issue_a_write):
+        # this function is only called on Linux
+        from rpython.jit.backend.x86 import stmtlocal
+        addr = stmtlocal.get_errno_tl()
+        assert rx86.fits_in_32bits(addr)
+        mc = self.mc
+        mc.writechar(stmtlocal.SEGMENT_TL)     # prefix: %fs or %gs
+        # !!important: the *next* instruction must be the one using 'addr'!!
+        if issue_a_write:
+            if isinstance(loc, RegLoc):
+                mc.MOV32_jr(addr, loc.value)       # memory write from reg
+            else:
+                assert isinstance(loc, ImmedLoc)
+                newvalue = loc.value
+                newvalue = rffi.cast(rffi.INT, newvalue)
+                newvalue = rffi.cast(lltype.Signed, newvalue)
+                mc.MOV32_ji(addr, newvalue)        # memory write immediate
+        else:
+            assert isinstance(loc, RegLoc)
+            if IS_X86_32:
+                mc.MOV_rj(loc.value, addr)         # memory read
+            elif IS_X86_64:
+                mc.MOVSX32_rj(loc.value, addr)     # memory read, sign-extend
 
 
 genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -25,9 +25,6 @@
     # arguments, we need to decrease esp temporarily
     stack_max = PASS_ON_MY_FRAME