[pypy-commit] pypy s390x-backend: merged default

Wed Jan 6 07:29:40 EST 2016

Author: Richard Plangger <planrichi at gmail.com>
Branch: s390x-backend
Changeset: r81599:b6d3c78012f2
Date: 2016-01-06 13:28 +0100
http://bitbucket.org/pypy/pypy/changeset/b6d3c78012f2/

Log:	merged default added stubs for malloc nursery set the jf_descr and
	gcmap too early (in generate quick_failure), that is problematic,
	cause it lets guard_not_forced fail when an exception is raised

diff too long, truncating to 2000 out of 2992 lines

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -28,7 +28,7 @@
     DEALINGS IN THE SOFTWARE.
 
 
-PyPy Copyright holders 2003-2015
+PyPy Copyright holders 2003-2016
 ----------------------------------- 
 
 Except when otherwise stated (look for LICENSE files or information at
diff --git a/lib-python/2.7/pickle.py b/lib-python/2.7/pickle.py
--- a/lib-python/2.7/pickle.py
+++ b/lib-python/2.7/pickle.py
@@ -1376,6 +1376,7 @@
 
 def decode_long(data):
     r"""Decode a long from a two's complement little-endian binary string.
+    This is overriden on PyPy by a RPython version that has linear complexity.
 
     >>> decode_long('')
     0L
@@ -1402,6 +1403,11 @@
         n -= 1L << (nbytes * 8)
     return n
 
+try:
+    from __pypy__ import decode_long
+except ImportError:
+    pass
+
 # Shorthands
 
 try:
diff --git a/lib-python/2.7/sysconfig.py b/lib-python/2.7/sysconfig.py
--- a/lib-python/2.7/sysconfig.py
+++ b/lib-python/2.7/sysconfig.py
@@ -524,6 +524,13 @@
             import _osx_support
             _osx_support.customize_config_vars(_CONFIG_VARS)
 
+        # PyPy:
+        import imp
+        for suffix, mode, type_ in imp.get_suffixes():
+            if type_ == imp.C_EXTENSION:
+                _CONFIG_VARS['SOABI'] = suffix.split('.')[1]
+                break
+
     if args:
         vals = []
         for name in args:
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -559,6 +559,7 @@
 
 def decode_long(data):
     r"""Decode a long from a two's complement little-endian binary string.
+    This is overriden on PyPy by a RPython version that has linear complexity.
 
     >>> decode_long('')
     0L
@@ -592,6 +593,11 @@
         n -= 1L << (nbytes << 3)
     return n
 
+try:
+    from __pypy__ import decode_long
+except ImportError:
+    pass
+
 def load(f):
     return Unpickler(f).load()
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,6 +5,8 @@
 .. this is a revision shortly after release-4.0.1
 .. startrev: 4b5c840d0da2
 
+Fixed ``_PyLong_FromByteArray()``, which was buggy.
+
 .. branch: numpy-1.10
 
 Fix tests to run cleanly with -A and start to fix micronumpy for upstream numpy
@@ -101,3 +103,10 @@
 
 Fix the cryptic exception message when attempting to use extended slicing
 in rpython. Was issue #2211.
+
+.. branch: ec-keepalive
+
+Optimize the case where, in a new C-created thread, we keep invoking
+short-running Python callbacks.  (CFFI on CPython has a hack to achieve
+the same result.)  This can also be seen as a bug fix: previously,
+thread-local objects would be reset between two such calls.
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -89,6 +89,7 @@
         'set_code_callback'         : 'interp_magic.set_code_callback',
         'save_module_content_for_future_reload':
                           'interp_magic.save_module_content_for_future_reload',
+        'decode_long'               : 'interp_magic.decode_long',
     }
     if sys.platform == 'win32':
         interpleveldefs['get_console_cp'] = 'interp_magic.get_console_cp'
diff --git a/pypy/module/__pypy__/interp_magic.py b/pypy/module/__pypy__/interp_magic.py
--- a/pypy/module/__pypy__/interp_magic.py
+++ b/pypy/module/__pypy__/interp_magic.py
@@ -1,4 +1,4 @@
-from pypy.interpreter.error import OperationError, wrap_oserror
+from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter.pycode import CodeHookCache
 from pypy.interpreter.pyframe import PyFrame
@@ -158,4 +158,13 @@
     if space.is_none(w_callable):
         cache._code_hook = None
     else:
-        cache._code_hook = w_callable
\ No newline at end of file
+        cache._code_hook = w_callable
+
+ at unwrap_spec(string=str, byteorder=str, signed=int)
+def decode_long(space, string, byteorder='little', signed=1):
+    from rpython.rlib.rbigint import rbigint, InvalidEndiannessError
+    try:
+        result = rbigint.frombytes(string, byteorder, bool(signed))
+    except InvalidEndiannessError:
+        raise oefmt(space.w_ValueError, "invalid byteorder argument")
+    return space.newlong_from_rbigint(result)
diff --git a/pypy/module/__pypy__/test/test_magic.py b/pypy/module/__pypy__/test/test_magic.py
--- a/pypy/module/__pypy__/test/test_magic.py
+++ b/pypy/module/__pypy__/test/test_magic.py
@@ -30,4 +30,20 @@
 """ in d
         finally:
             __pypy__.set_code_callback(None)
-        assert d['f'].__code__ in l
\ No newline at end of file
+        assert d['f'].__code__ in l
+
+    def test_decode_long(self):
+        from __pypy__ import decode_long
+        assert decode_long('') == 0
+        assert decode_long('\xff\x00') == 255
+        assert decode_long('\xff\x7f') == 32767
+        assert decode_long('\x00\xff') == -256
+        assert decode_long('\x00\x80') == -32768
+        assert decode_long('\x80') == -128
+        assert decode_long('\x7f') == 127
+        assert decode_long('\x55' * 97) == (1 << (97 * 8)) // 3
+        assert decode_long('\x00\x80', 'big') == 128
+        assert decode_long('\xff\x7f', 'little', False) == 32767
+        assert decode_long('\x00\x80', 'little', False) == 32768
+        assert decode_long('\x00\x80', 'little', True) == -32768
+        raises(ValueError, decode_long, '', 'foo')
diff --git a/pypy/module/_cffi_backend/cglob.py b/pypy/module/_cffi_backend/cglob.py
--- a/pypy/module/_cffi_backend/cglob.py
+++ b/pypy/module/_cffi_backend/cglob.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.module._cffi_backend.cdataobj import W_CData
 from pypy.module._cffi_backend import newtype
+from rpython.rlib import rgil
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
@@ -26,7 +27,9 @@
             if not we_are_translated():
                 FNPTR = rffi.CCallback([], rffi.VOIDP)
                 fetch_addr = rffi.cast(FNPTR, self.fetch_addr)
+                rgil.release()
                 result = fetch_addr()
+                rgil.acquire()
             else:
                 # careful in translated versions: we need to call fetch_addr,
                 # but in a GIL-releasing way.  The easiest is to invoke a
diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -423,7 +423,9 @@
             exchange_offset += rffi.getintfield(self.atypes[i], 'c_size')
 
         # store the exchange data size
-        cif_descr.exchange_size = exchange_offset
+        # we also align it to the next multiple of 8, in an attempt to
+        # work around bugs(?) of libffi (see cffi issue #241)
+        cif_descr.exchange_size = self.align_arg(exchange_offset)
 
     def fb_extra_fields(self, cif_descr):
         cif_descr.abi = self.fabi
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -228,26 +228,11 @@
 def _PyLong_FromByteArray(space, bytes, n, little_endian, signed):
     little_endian = rffi.cast(lltype.Signed, little_endian)
     signed = rffi.cast(lltype.Signed, signed)
-
-    result = rbigint()
-    negative = False
-
-    for i in range(0, n):
-        if little_endian:
-            c = intmask(bytes[i])
-        else:
-            c = intmask(bytes[n - i - 1])
-        if i == 0 and signed and c & 0x80:
-            negative = True
-        if negative:
-            c = c ^ 0xFF
-        digit = rbigint.fromint(c)
-
-        result = result.lshift(8)
-        result = result.add(digit)
-
-    if negative:
-        result = result.neg()
-
+    s = rffi.charpsize2str(rffi.cast(rffi.CCHARP, bytes),
+                           rffi.cast(lltype.Signed, n))
+    if little_endian:
+        byteorder = 'little'
+    else:
+        byteorder = 'big'
+    result = rbigint.frombytes(s, byteorder, signed != 0)
     return space.newlong_from_rbigint(result)
-
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -4,8 +4,7 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    cpython_api, generic_cpy_call, PyObject, Py_ssize_t, Py_TPFLAGS_CHECKTYPES,
-    CANNOT_FAIL)
+    cpython_api, generic_cpy_call, PyObject, Py_ssize_t, Py_TPFLAGS_CHECKTYPES)
 from pypy.module.cpyext.typeobjectdefs import (
     unaryfunc, wrapperfunc, ternaryfunc, PyTypeObjectPtr, binaryfunc,
     getattrfunc, getattrofunc, setattrofunc, lenfunc, ssizeargfunc, inquiry,
@@ -387,7 +386,7 @@
             return
 
         @cpython_api([PyObject, PyObject], PyObject,
-                     error=CANNOT_FAIL, external=True)
+                     external=True)
         @func_renamer("cpyext_tp_getattro_%s" % (typedef.name,))
         def slot_tp_getattro(space, w_self, w_name):
             return space.call_function(getattr_fn, w_self, w_name)
diff --git a/pypy/module/cpyext/test/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py
--- a/pypy/module/cpyext/test/test_longobject.py
+++ b/pypy/module/cpyext/test/test_longobject.py
@@ -175,10 +175,26 @@
                                               little_endian, is_signed);
              """),
             ])
-        assert module.from_bytearray(True, False) == 0x9ABC
-        assert module.from_bytearray(True, True) == -0x6543
-        assert module.from_bytearray(False, False) == 0xBC9A
-        assert module.from_bytearray(False, True) == -0x4365
+        assert module.from_bytearray(True, False) == 0xBC9A
+        assert module.from_bytearray(True, True) == -0x4366
+        assert module.from_bytearray(False, False) == 0x9ABC
+        assert module.from_bytearray(False, True) == -0x6544
+
+    def test_frombytearray_2(self):
+        module = self.import_extension('foo', [
+            ("from_bytearray", "METH_VARARGS",
+             """
+                 int little_endian, is_signed;
+                 if (!PyArg_ParseTuple(args, "ii", &little_endian, &is_signed))
+                     return NULL;
+                 return _PyLong_FromByteArray("\x9A\xBC\x41", 3,
+                                              little_endian, is_signed);
+             """),
+            ])
+        assert module.from_bytearray(True, False) == 0x41BC9A
+        assert module.from_bytearray(True, True) == 0x41BC9A
+        assert module.from_bytearray(False, False) == 0x9ABC41
+        assert module.from_bytearray(False, True) == -0x6543BF
 
     def test_fromunicode(self):
         module = self.import_extension('foo', [
diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -414,15 +414,26 @@
                      return NULL;
                  }
                  PyObject *name = PyString_FromString("attr1");
-                 PyIntObject *attr1 = obj->ob_type->tp_getattro(obj, name);
-                 if (attr1->ob_ival != value->ob_ival)
+                 PyIntObject *attr = obj->ob_type->tp_getattro(obj, name);
+                 if (attr->ob_ival != value->ob_ival)
                  {
                      PyErr_SetString(PyExc_ValueError,
                                      "tp_getattro returned wrong value");
                      return NULL;
                  }
                  Py_DECREF(name);
-                 Py_DECREF(attr1);
+                 Py_DECREF(attr);
+                 name = PyString_FromString("attr2");
+                 attr = obj->ob_type->tp_getattro(obj, name);
+                 if (attr == NULL && PyErr_ExceptionMatches(PyExc_AttributeError))
+                 {
+                     PyErr_Clear();
+                 } else {
+                     PyErr_SetString(PyExc_ValueError,
+                                     "tp_getattro should have raised");
+                     return NULL;
+                 }
+                 Py_DECREF(name);
                  Py_RETURN_TRUE;
              '''
              )
@@ -637,7 +648,7 @@
                 IntLikeObject *intObj;
                 long intval;
 
-                if (!PyArg_ParseTuple(args, "i", &intval))
+                if (!PyArg_ParseTuple(args, "l", &intval))
                     return NULL;
 
                 IntLike_Type.tp_as_number = &intlike_as_number;
@@ -657,7 +668,7 @@
                 IntLikeObjectNoOp *intObjNoOp;
                 long intval;
 
-                if (!PyArg_ParseTuple(args, "i", &intval))
+                if (!PyArg_ParseTuple(args, "l", &intval))
                     return NULL;
 
                 IntLike_Type_NoOp.tp_flags |= Py_TPFLAGS_CHECKTYPES;
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -299,7 +299,7 @@
         return build_stat_result(space, st)
 
 def lstat(space, w_path):
-    "Like stat(path), but do no follow symbolic links."
+    "Like stat(path), but do not follow symbolic links."
     try:
         st = dispatch_filename(rposix_stat.lstat)(space, w_path)
     except OSError, e:
diff --git a/pypy/module/pypyjit/test_pypy_c/test_struct.py b/pypy/module/pypyjit/test_pypy_c/test_struct.py
--- a/pypy/module/pypyjit/test_pypy_c/test_struct.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_struct.py
@@ -45,7 +45,7 @@
 
         # the newstr and the strsetitems are because the string is forced,
         # which is in turn because the optimizer doesn't know how to handle a
-        # getarrayitem_gc_i on a virtual string. It could be improved, but it
+        # gc_load_indexed_i on a virtual string. It could be improved, but it
         # is also true that in real life cases struct.unpack is called on
         # strings which come from the outside, so it's a minor issue.
         assert loop.match_by_id("unpack", """
@@ -55,17 +55,17 @@
             strsetitem(p88, 1, i14)
             strsetitem(p88, 2, i17)
             strsetitem(p88, 3, i20)
-            i91 = getarrayitem_gc_i(p88, 0, descr=<ArrayS 4>)
+            i91 = gc_load_indexed_i(p88, 0, 1, _, -4)
         """)
 
     def test_struct_object(self):
         def main(n):
             import struct
-            s = struct.Struct("i")
+            s = struct.Struct("ii")
             i = 1
             while i < n:
-                buf = s.pack(i)       # ID: pack
-                x = s.unpack(buf)[0]  # ID: unpack
+                buf = s.pack(-1, i)     # ID: pack
+                x = s.unpack(buf)[1]    # ID: unpack
                 i += x / i
             return i
 
@@ -88,10 +88,15 @@
 
         assert loop.match_by_id('unpack', """
             # struct.unpack
-            p88 = newstr(4)
-            strsetitem(p88, 0, i11)
-            strsetitem(p88, 1, i14)
-            strsetitem(p88, 2, i17)
-            strsetitem(p88, 3, i20)
-            i91 = getarrayitem_gc_i(p88, 0, descr=<ArrayS 4>)
+            p88 = newstr(8)
+            strsetitem(p88, 0, 255)
+            strsetitem(p88, 1, 255)
+            strsetitem(p88, 2, 255)
+            strsetitem(p88, 3, 255)
+            strsetitem(p88, 4, i11)
+            strsetitem(p88, 5, i14)
+            strsetitem(p88, 6, i17)
+            strsetitem(p88, 7, i20)
+            i90 = gc_load_indexed_i(p88, 0, 1, _, -4)
+            i91 = gc_load_indexed_i(p88, 4, 1, _, -4)
         """)
diff --git a/pypy/module/thread/__init__.py b/pypy/module/thread/__init__.py
--- a/pypy/module/thread/__init__.py
+++ b/pypy/module/thread/__init__.py
@@ -27,7 +27,7 @@
         from pypy.module.thread import gil
         MixedModule.__init__(self, space, *args)
         prev_ec = space.threadlocals.get_ec()
-        space.threadlocals = gil.GILThreadLocals()
+        space.threadlocals = gil.GILThreadLocals(space)
         space.threadlocals.initialize(space)
         if prev_ec is not None:
             space.threadlocals._set_ec(prev_ec)
diff --git a/pypy/module/thread/test/test_gil.py b/pypy/module/thread/test/test_gil.py
--- a/pypy/module/thread/test/test_gil.py
+++ b/pypy/module/thread/test/test_gil.py
@@ -65,7 +65,7 @@
             except Exception, e:
                 assert 0
             thread.gc_thread_die()
-        my_gil_threadlocals = gil.GILThreadLocals()
+        my_gil_threadlocals = gil.GILThreadLocals(space)
         def f():
             state.data = []
             state.datalen1 = 0
diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py
--- a/pypy/module/thread/threadlocals.py
+++ b/pypy/module/thread/threadlocals.py
@@ -1,5 +1,7 @@
-from rpython.rlib import rthread
+import weakref
+from rpython.rlib import rthread, rshrinklist
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.rarithmetic import r_ulonglong
 from pypy.module.thread.error import wrap_thread_error
 from pypy.interpreter.executioncontext import ExecutionContext
 
@@ -13,15 +15,51 @@
     a thread finishes.  This works as long as the thread was started by
     os_thread.bootstrap()."""
 
-    def __init__(self):
+    def __init__(self, space):
         "NOT_RPYTHON"
-        self._valuedict = {}   # {thread_ident: ExecutionContext()}
+        #
+        # This object tracks code that enters and leaves threads.
+        # There are two APIs.  For Python-level threads, we know when
+        # the thread starts and ends, and we call enter_thread() and
+        # leave_thread().  In a few other cases, like callbacks, we
+        # might be running in some never-seen-before thread: in this
+        # case, the callback logic needs to call try_enter_thread() at
+        # the start, and if this returns True it needs to call
+        # leave_thread() at the end.
+        #
+        # We implement an optimization for the second case (which only
+        # works if we translate with a framework GC and with
+        # rweakref).  If try_enter_thread() is called in a
+        # never-seen-before thread, it still returns False and
+        # remembers the ExecutionContext with 'self._weaklist'.  The
+        # next time we call try_enter_thread() again in the same
+        # thread, the ExecutionContext is reused.  The optimization is
+        # not completely invisible to the user: 'thread._local()'
+        # values will remain.  We can argue that it is the correct
+        # behavior to do that, and the behavior we get if the
+        # optimization is disabled is buggy (but hard to do better
+        # then).
+        #
+        # 'self._valuedict' is a dict mapping the thread idents to
+        # ExecutionContexts; it does not list the ExecutionContexts
+        # which are in 'self._weaklist'.  (The latter is more precisely
+        # a list of AutoFreeECWrapper objects, defined below, which
+        # each references the ExecutionContext.)
+        #
+        self.space = space
+        self._valuedict = {}
         self._cleanup_()
         self.raw_thread_local = rthread.ThreadLocalReference(ExecutionContext,
                                                             loop_invariant=True)
 
+    def can_optimize_with_weaklist(self):
+        config = self.space.config
+        return (config.translation.rweakref and
+                rthread.ThreadLocalReference.automatic_keepalive(config))
+
     def _cleanup_(self):
         self._valuedict.clear()
+        self._weaklist = None
         self._mainthreadident = 0
 
     def enter_thread(self, space):
@@ -29,19 +67,35 @@
         self._set_ec(space.createexecutioncontext())
 
     def try_enter_thread(self, space):
-        if rthread.get_ident() in self._valuedict:
+        # common case: the thread-local has already got a value
+        if self.raw_thread_local.get() is not None:
             return False
-        self.enter_thread(space)
-        return True
 
-    def _set_ec(self, ec):
+        # Else, make and attach a new ExecutionContext
+        ec = space.createexecutioncontext()
+        if not self.can_optimize_with_weaklist():
+            self._set_ec(ec)
+            return True
+
+        # If can_optimize_with_weaklist(), then 'rthread' keeps the
+        # thread-local values alive until the end of the thread.  Use
+        # AutoFreeECWrapper as an object with a __del__; when this
+        # __del__ is called, it means the thread was really finished.
+        # In this case we don't want leave_thread() to be called
+        # explicitly, so we return False.
+        if self._weaklist is None:
+            self._weaklist = ListECWrappers()
+        self._weaklist.append(weakref.ref(AutoFreeECWrapper(ec)))
+        self._set_ec(ec, register_in_valuedict=False)
+        return False
+
+    def _set_ec(self, ec, register_in_valuedict=True):
         ident = rthread.get_ident()
         if self._mainthreadident == 0 or self._mainthreadident == ident:
             ec._signals_enabled = 1    # the main thread is enabled
             self._mainthreadident = ident
-        self._valuedict[ident] = ec
-        # This logic relies on hacks and _make_sure_does_not_move().
-        # It only works because we keep the 'ec' alive in '_valuedict' too.
+        if register_in_valuedict:
+            self._valuedict[ident] = ec
         self.raw_thread_local.set(ec)
 
     def leave_thread(self, space):
@@ -84,7 +138,23 @@
         ec._signals_enabled = new
 
     def getallvalues(self):
-        return self._valuedict
+        if self._weaklist is None:
+            return self._valuedict
+        # This logic walks the 'self._weaklist' list and adds the
+        # ExecutionContexts to 'result'.  We are careful in case there
+        # are two AutoFreeECWrappers in the list which have the same
+        # 'ident'; in this case we must keep the most recent one (the
+        # older one should be deleted soon).  Moreover, entries in
+        # self._valuedict have priority because they are never
+        # outdated.
+        result = {}
+        for h in self._weaklist.items():
+            wrapper = h()
+            if wrapper is not None and not wrapper.deleted:
+                result[wrapper.ident] = wrapper.ec
+                # ^^ this possibly overwrites an older ec
+        result.update(self._valuedict)
+        return result
 
     def reinit_threads(self, space):
         "Called in the child process after a fork()"
@@ -94,7 +164,31 @@
         old_sig = ec._signals_enabled
         if ident != self._mainthreadident:
             old_sig += 1
-        self._cleanup_()
+        self._cleanup_()      # clears self._valuedict
         self._mainthreadident = ident
         self._set_ec(ec)
         ec._signals_enabled = old_sig
+
+
+class AutoFreeECWrapper(object):
+    deleted = False
+
+    def __init__(self, ec):
+        # this makes a loop between 'self' and 'ec'.  It should not prevent
+        # the __del__ method here from being called.
+        self.ec = ec
+        ec._threadlocals_auto_free = self
+        self.ident = rthread.get_ident()
+
+    def __del__(self):
+        from pypy.module.thread.os_local import thread_is_stopping
+        # this is always called in another thread: the thread
+        # referenced by 'self.ec' has finished at that point, and
+        # we're just after the GC which finds no more references to
+        # 'ec' (and thus to 'self').
+        self.deleted = True
+        thread_is_stopping(self.ec)
+
+class ListECWrappers(rshrinklist.AbstractShrinkList):
+    def must_keep(self, wref):
+        return wref() is not None
diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -358,3 +358,10 @@
         assert 3L.__coerce__(4L) == (3L, 4L)
         assert 3L.__coerce__(4) == (3, 4)
         assert 3L.__coerce__(object()) == NotImplemented
+
+    def test_linear_long_base_16(self):
+        # never finishes if long(_, 16) is not linear-time
+        size = 100000
+        n = "a" * size
+        expected = (2 << (size * 4)) // 3
+        assert long(n, 16) == expected
diff --git a/rpython/annotator/signature.py b/rpython/annotator/signature.py
--- a/rpython/annotator/signature.py
+++ b/rpython/annotator/signature.py
@@ -100,6 +100,7 @@
         self.argtypes = argtypes
 
     def __call__(self, funcdesc, inputcells):
+        from rpython.rlib.objectmodel import NOT_CONSTANT
         from rpython.rtyper.lltypesystem import lltype
         args_s = []
         from rpython.annotator import model as annmodel
@@ -115,6 +116,9 @@
                 args_s.append(s_input)
             elif argtype is None:
                 args_s.append(inputcells[i])     # no change
+            elif argtype is NOT_CONSTANT:
+                from rpython.annotator.model import not_const
+                args_s.append(not_const(inputcells[i]))
             else:
                 args_s.append(annotation(argtype, bookkeeper=funcdesc.bookkeeper))
         if len(inputcells) != len(args_s):
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -13,6 +13,7 @@
 
 from rpython.rtyper.llinterp import LLInterpreter, LLException
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
+from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper import rclass
 
 from rpython.rlib.clibffi import FFI_DEFAULT_ABI
@@ -638,18 +639,9 @@
         return array.getlength()
 
     def bh_getarrayitem_gc(self, a, index, descr):
+        a = support.cast_arg(lltype.Ptr(descr.A), a)
+        array = a._obj
         assert index >= 0
-        if descr.A is descr.OUTERA:
-            a = support.cast_arg(lltype.Ptr(descr.A), a)
-        else:
-            # we use rffi.cast instead of support.cast_arg because the types
-            # might not be "compatible" enough from the lltype point of
-            # view. In particular, this happens when we use
-            # str_storage_getitem, in which an rpy_string is casted to
-            # rpy_string_as_Signed (or similar)
-            a = rffi.cast(lltype.Ptr(descr.OUTERA), a)
-            a = getattr(a, descr.OUTERA._arrayfld)
-        array = a._obj
         return support.cast_result(descr.A.OF, array.getitem(index))
 
     bh_getarrayitem_gc_pure_i = bh_getarrayitem_gc
@@ -714,6 +706,24 @@
         else:
             return self.bh_raw_load_i(struct, offset, descr)
 
+    def bh_gc_load_indexed_i(self, struct, index, scale, base_ofs, bytes):
+        if   bytes == 1: T = rffi.UCHAR
+        elif bytes == 2: T = rffi.USHORT
+        elif bytes == 4: T = rffi.UINT
+        elif bytes == 8: T = rffi.ULONGLONG
+        elif bytes == -1: T = rffi.SIGNEDCHAR
+        elif bytes == -2: T = rffi.SHORT
+        elif bytes == -4: T = rffi.INT
+        elif bytes == -8: T = rffi.LONGLONG
+        else: raise NotImplementedError(bytes)
+        x = llop.gc_load_indexed(T, struct, index, scale, base_ofs)
+        return lltype.cast_primitive(lltype.Signed, x)
+
+    def bh_gc_load_indexed_f(self, struct, index, scale, base_ofs, bytes):
+        if bytes != 8:
+            raise Exception("gc_load_indexed_f is only for 'double'!")
+        return llop.gc_load_indexed(rffi.DOUBLE, struct, index, scale, base_ofs)
+
     def bh_increment_debug_counter(self, addr):
         p = rffi.cast(rffi.CArrayPtr(lltype.Signed), addr)
         p[0] += 1
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -725,6 +725,16 @@
     def bh_raw_load_f(self, addr, offset, descr):
         return self.read_float_at_mem(addr, offset)
 
+    def bh_gc_load_indexed_i(self, addr, index, scale, base_ofs, bytes):
+        offset = base_ofs + scale * index
+        return self.read_int_at_mem(addr, offset, abs(bytes), bytes < 0)
+
+    def bh_gc_load_indexed_f(self, addr, index, scale, base_ofs, bytes):
+        # only for 'double'!
+        assert bytes == rffi.sizeof(lltype.Float)
+        offset = base_ofs + scale * index
+        return self.read_float_at_mem(addr, offset)
+
     def bh_new(self, sizedescr):
         return self.gc_ll_descr.gc_malloc(sizedescr)
 
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -20,7 +20,7 @@
                                                  PPCBuilder, PPCGuardToken)
 from rpython.jit.backend.ppc.regalloc import TempPtr, TempInt
 from rpython.jit.backend.llsupport import symbolic, jitframe
-from rpython.jit.backend.llsupport.descr import InteriorFieldDescr, CallDescr
+from rpython.jit.backend.llsupport.descr import CallDescr
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.rtyper.lltypesystem import rstr, rffi, lltype
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
@@ -706,8 +706,10 @@
 
     _mixin_ = True
 
-    def _write_to_mem(self, value_loc, base_loc, ofs, size):
-        if size.value == 8:
+    def _write_to_mem(self, value_loc, base_loc, ofs, size_loc):
+        assert size_loc.is_imm()
+        size = size_loc.value
+        if size == 8:
             if value_loc.is_fp_reg():
                 if ofs.is_imm():
                     self.mc.stfd(value_loc.value, base_loc.value, ofs.value)
@@ -718,17 +720,17 @@
                     self.mc.std(value_loc.value, base_loc.value, ofs.value)
                 else:
                     self.mc.stdx(value_loc.value, base_loc.value, ofs.value)
-        elif size.value == 4:
+        elif size == 4:
             if ofs.is_imm():
                 self.mc.stw(value_loc.value, base_loc.value, ofs.value)
             else:
                 self.mc.stwx(value_loc.value, base_loc.value, ofs.value)
-        elif size.value == 2:
+        elif size == 2:
             if ofs.is_imm():
                 self.mc.sth(value_loc.value, base_loc.value, ofs.value)
             else:
                 self.mc.sthx(value_loc.value, base_loc.value, ofs.value)
-        elif size.value == 1:
+        elif size == 1:
             if ofs.is_imm():
                 self.mc.stb(value_loc.value, base_loc.value, ofs.value)
             else:
@@ -736,18 +738,35 @@
         else:
             assert 0, "size not supported"
 
-    def emit_setfield_gc(self, op, arglocs, regalloc):
-        value_loc, base_loc, ofs, size = arglocs
-        self._write_to_mem(value_loc, base_loc, ofs, size)
+    def emit_gc_store(self, op, arglocs, regalloc):
+        value_loc, base_loc, ofs_loc, size_loc = arglocs
+        self._write_to_mem(value_loc, base_loc, ofs_loc, size_loc)
 
-    emit_setfield_raw = emit_setfield_gc
-    emit_zero_ptr_field = emit_setfield_gc
+    def _apply_offset(self, index_loc, ofs_loc):
+        # If offset != 0 then we have to add it here.  Note that
+        # mc.addi() would not be valid with operand r0.
+        assert ofs_loc.is_imm()                # must be an immediate...
+        assert _check_imm_arg(ofs_loc.getint())   # ...that fits 16 bits
+        assert index_loc is not r.SCRATCH2
+        # (simplified version of _apply_scale())
+        if ofs_loc.value > 0:
+            self.mc.addi(r.SCRATCH2.value, index_loc.value, ofs_loc.value)
+            index_loc = r.SCRATCH2
+        return index_loc
 
-    def _load_from_mem(self, res, base_loc, ofs, size, signed):
+    def emit_gc_store_indexed(self, op, arglocs, regalloc):
+        base_loc, index_loc, value_loc, ofs_loc, size_loc = arglocs
+        index_loc = self._apply_offset(index_loc, ofs_loc)
+        self._write_to_mem(value_loc, base_loc, index_loc, size_loc)
+
+    def _load_from_mem(self, res, base_loc, ofs, size_loc, sign_loc):
         # res, base_loc, ofs, size and signed are all locations
         assert base_loc is not r.SCRATCH
-        sign = signed.value
-        if size.value == 8:
+        assert size_loc.is_imm()
+        size = size_loc.value
+        assert sign_loc.is_imm()
+        sign = sign_loc.value
+        if size == 8:
             if res.is_fp_reg():
                 if ofs.is_imm():
                     self.mc.lfd(res.value, base_loc.value, ofs.value)
@@ -758,7 +777,7 @@
                     self.mc.ld(res.value, base_loc.value, ofs.value)
                 else:
                     self.mc.ldx(res.value, base_loc.value, ofs.value)
-        elif size.value == 4:
+        elif size == 4:
             if IS_PPC_64 and sign:
                 if ofs.is_imm():
                     self.mc.lwa(res.value, base_loc.value, ofs.value)
@@ -769,7 +788,7 @@
                     self.mc.lwz(res.value, base_loc.value, ofs.value)
                 else:
                     self.mc.lwzx(res.value, base_loc.value, ofs.value)
-        elif size.value == 2:
+        elif size == 2:
             if sign:
                 if ofs.is_imm():
                     self.mc.lha(res.value, base_loc.value, ofs.value)
@@ -780,7 +799,7 @@
                     self.mc.lhz(res.value, base_loc.value, ofs.value)
                 else:
                     self.mc.lhzx(res.value, base_loc.value, ofs.value)
-        elif size.value == 1:
+        elif size == 1:
             if ofs.is_imm():
                 self.mc.lbz(res.value, base_loc.value, ofs.value)
             else:
@@ -790,22 +809,28 @@
         else:
             assert 0, "size not supported"
 
-    def _genop_getfield(self, op, arglocs, regalloc):
-        base_loc, ofs, res, size, sign = arglocs
-        self._load_from_mem(res, base_loc, ofs, size, sign)
+    def _genop_gc_load(self, op, arglocs, regalloc):
+        base_loc, ofs_loc, res_loc, size_loc, sign_loc = arglocs
+        self._load_from_mem(res_loc, base_loc, ofs_loc, size_loc, sign_loc)
 
-    emit_getfield_gc_i = _genop_getfield
-    emit_getfield_gc_r = _genop_getfield
-    emit_getfield_gc_f = _genop_getfield
-    emit_getfield_gc_pure_i = _genop_getfield
-    emit_getfield_gc_pure_r = _genop_getfield
-    emit_getfield_gc_pure_f = _genop_getfield
-    emit_getfield_raw_i = _genop_getfield
-    emit_getfield_raw_f = _genop_getfield
+    emit_gc_load_i = _genop_gc_load
+    emit_gc_load_r = _genop_gc_load
+    emit_gc_load_f = _genop_gc_load
+
+    def _genop_gc_load_indexed(self, op, arglocs, regalloc):
+        base_loc, index_loc, res_loc, ofs_loc, size_loc, sign_loc = arglocs
+        index_loc = self._apply_offset(index_loc, ofs_loc)
+        self._load_from_mem(res_loc, base_loc, index_loc, size_loc, sign_loc)
+
+    emit_gc_load_indexed_i = _genop_gc_load_indexed
+    emit_gc_load_indexed_r = _genop_gc_load_indexed
+    emit_gc_load_indexed_f = _genop_gc_load_indexed
 
     SIZE2SCALE = dict([(1<<_i, _i) for _i in range(32)])
 
     def _multiply_by_constant(self, loc, multiply_by, scratch_loc):
+        # XXX should die together with _apply_scale() but can't because
+        # of emit_zero_array() and malloc_cond_varsize() at the moment
         assert loc.is_reg()
         if multiply_by == 1:
             return loc
@@ -827,6 +852,9 @@
         return scratch_loc
 
     def _apply_scale(self, ofs, index_loc, itemsize):
+        # XXX should die now that getarrayitem and getinteriorfield are gone
+        # but can't because of emit_zero_array() at the moment
+
         # For arrayitem and interiorfield reads and writes: this returns an
         # offset suitable for use in ld/ldx or similar instructions.
         # The result will be either the register r2 or a 16-bit immediate.
@@ -857,44 +885,6 @@
                 index_loc = r.SCRATCH2
             return index_loc
 
-    def _genop_getarray_or_interiorfield(self, op, arglocs, regalloc):
-        (base_loc, index_loc, res_loc, ofs_loc,
-            itemsize, fieldsize, fieldsign) = arglocs
-        ofs_loc = self._apply_scale(ofs_loc, index_loc, itemsize)
-        self._load_from_mem(res_loc, base_loc, ofs_loc, fieldsize, fieldsign)
-
-    emit_getinteriorfield_gc_i = _genop_getarray_or_interiorfield
-    emit_getinteriorfield_gc_r = _genop_getarray_or_interiorfield
-    emit_getinteriorfield_gc_f = _genop_getarray_or_interiorfield
-
-    def emit_setinteriorfield_gc(self, op, arglocs, regalloc):
-        (base_loc, index_loc, value_loc, ofs_loc,
-            itemsize, fieldsize) = arglocs
-        ofs_loc = self._apply_scale(ofs_loc, index_loc, itemsize)
-        self._write_to_mem(value_loc, base_loc, ofs_loc, fieldsize)
-
-    emit_setinteriorfield_raw = emit_setinteriorfield_gc
-
-    def emit_arraylen_gc(self, op, arglocs, regalloc):
-        res, base_loc, ofs = arglocs
-        self.mc.load(res.value, base_loc.value, ofs.value)
-
-    emit_setarrayitem_gc = emit_setinteriorfield_gc
-    emit_setarrayitem_raw = emit_setarrayitem_gc
-
-    emit_getarrayitem_gc_i = _genop_getarray_or_interiorfield
-    emit_getarrayitem_gc_r = _genop_getarray_or_interiorfield
-    emit_getarrayitem_gc_f = _genop_getarray_or_interiorfield
-    emit_getarrayitem_gc_pure_i = _genop_getarray_or_interiorfield
-    emit_getarrayitem_gc_pure_r = _genop_getarray_or_interiorfield
-    emit_getarrayitem_gc_pure_f = _genop_getarray_or_interiorfield
-    emit_getarrayitem_raw_i = _genop_getarray_or_interiorfield
-    emit_getarrayitem_raw_f = _genop_getarray_or_interiorfield
-
-    emit_raw_store = emit_setarrayitem_gc
-    emit_raw_load_i = _genop_getarray_or_interiorfield
-    emit_raw_load_f = _genop_getarray_or_interiorfield
-
     def _copy_in_scratch2(self, loc):
         if loc.is_imm():
             self.mc.li(r.SCRATCH2.value, loc.value)
@@ -998,10 +988,6 @@
 
     _mixin_ = True
 
-    emit_strlen = FieldOpAssembler._genop_getfield
-    emit_strgetitem = FieldOpAssembler._genop_getarray_or_interiorfield
-    emit_strsetitem = FieldOpAssembler.emit_setarrayitem_gc
-
     def emit_copystrcontent(self, op, arglocs, regalloc):
         self._emit_copycontent(arglocs, is_unicode=False)
 
@@ -1059,12 +1045,8 @@
 
 
 class UnicodeOpAssembler(object):
-
     _mixin_ = True
-
-    emit_unicodelen = FieldOpAssembler._genop_getfield
-    emit_unicodegetitem = FieldOpAssembler._genop_getarray_or_interiorfield
-    emit_unicodesetitem = FieldOpAssembler.emit_setarrayitem_gc
+    # empty!
 
 
 class AllocOpAssembler(object):
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -17,12 +17,9 @@
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
 from rpython.jit.backend.llsupport import symbolic
-from rpython.jit.backend.llsupport.descr import ArrayDescr
+from rpython.jit.backend.llsupport.descr import unpack_arraydescr
 import rpython.jit.backend.ppc.register as r
 import rpython.jit.backend.ppc.condition as c
-from rpython.jit.backend.llsupport.descr import unpack_arraydescr
-from rpython.jit.backend.llsupport.descr import unpack_fielddescr
-from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.debug import debug_print
@@ -691,159 +688,69 @@
                                  src_locations2, dst_locations2, fptmploc)
         return []
 
-    def prepare_setfield_gc(self, op):
-        ofs, size, _ = unpack_fielddescr(op.getdescr())
+    def prepare_gc_store(self, op):
         base_loc = self.ensure_reg(op.getarg(0))
-        value_loc = self.ensure_reg(op.getarg(1))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        return [value_loc, base_loc, ofs_loc, imm(size)]
+        ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(1))
+        value_loc = self.ensure_reg(op.getarg(2))
+        size_loc = self.ensure_reg_or_any_imm(op.getarg(3))
+        return [value_loc, base_loc, ofs_loc, size_loc]
 
-    prepare_setfield_raw = prepare_setfield_gc
+    def _prepare_gc_load(self, op):
+        base_loc = self.ensure_reg(op.getarg(0))
+        ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(1))
+        self.free_op_vars()
+        res_loc = self.force_allocate_reg(op)
+        size_box = op.getarg(2)
+        assert isinstance(size_box, ConstInt)
+        nsize = size_box.value      # negative for "signed"
+        size_loc = imm(abs(nsize))
+        if nsize < 0:
+            sign = 1
+        else:
+            sign = 0
+        return [base_loc, ofs_loc, res_loc, size_loc, imm(sign)]
 
-    def _prepare_getfield(self, op):
-        ofs, size, sign = unpack_fielddescr(op.getdescr())
+    prepare_gc_load_i = _prepare_gc_load
+    prepare_gc_load_r = _prepare_gc_load
+    prepare_gc_load_f = _prepare_gc_load
+
+    def prepare_gc_store_indexed(self, op):
         base_loc = self.ensure_reg(op.getarg(0))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
+        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
+        value_loc = self.ensure_reg(op.getarg(2))
+        assert op.getarg(3).getint() == 1    # scale
+        ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(4))
+        assert ofs_loc.is_imm()  # the arg(4) should always be a small constant
+        size_loc = self.ensure_reg_or_any_imm(op.getarg(5))
+        return [base_loc, index_loc, value_loc, ofs_loc, size_loc]
+
+    def _prepare_gc_load_indexed(self, op):
+        base_loc = self.ensure_reg(op.getarg(0))
+        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
+        assert op.getarg(2).getint() == 1    # scale
+        ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(3))
+        assert ofs_loc.is_imm()  # the arg(3) should always be a small constant
         self.free_op_vars()
-        res = self.force_allocate_reg(op)
-        return [base_loc, ofs_loc, res, imm(size), imm(sign)]
+        res_loc = self.force_allocate_reg(op)
+        size_box = op.getarg(4)
+        assert isinstance(size_box, ConstInt)
+        nsize = size_box.value      # negative for "signed"
+        size_loc = imm(abs(nsize))
+        if nsize < 0:
+            sign = 1
+        else:
+            sign = 0
+        return [base_loc, index_loc, res_loc, ofs_loc, size_loc, imm(sign)]
 
-    prepare_getfield_gc_i = _prepare_getfield
-    prepare_getfield_gc_r = _prepare_getfield
-    prepare_getfield_gc_f = _prepare_getfield
-    prepare_getfield_raw_i = _prepare_getfield
-    prepare_getfield_raw_f = _prepare_getfield
-    prepare_getfield_gc_pure_i = _prepare_getfield
-    prepare_getfield_gc_pure_r = _prepare_getfield
-    prepare_getfield_gc_pure_f = _prepare_getfield
+    prepare_gc_load_indexed_i = _prepare_gc_load_indexed
+    prepare_gc_load_indexed_r = _prepare_gc_load_indexed
+    prepare_gc_load_indexed_f = _prepare_gc_load_indexed
 
     def prepare_increment_debug_counter(self, op):
         base_loc = self.ensure_reg(op.getarg(0))
         temp_loc = r.SCRATCH2
         return [base_loc, temp_loc]
 
-    def _prepare_getinteriorfield(self, op):
-        t = unpack_interiorfielddescr(op.getdescr())
-        ofs, itemsize, fieldsize, sign = t
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        return [base_loc, index_loc, result_loc, ofs_loc,
-                imm(itemsize), imm(fieldsize), imm(sign)]
-
-    prepare_getinteriorfield_gc_i = _prepare_getinteriorfield
-    prepare_getinteriorfield_gc_r = _prepare_getinteriorfield
-    prepare_getinteriorfield_gc_f = _prepare_getinteriorfield
-
-    def prepare_setinteriorfield_gc(self, op):
-        t = unpack_interiorfielddescr(op.getdescr())
-        ofs, itemsize, fieldsize, _ = t
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        value_loc = self.ensure_reg(op.getarg(2))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        return [base_loc, index_loc, value_loc, ofs_loc,
-                imm(itemsize), imm(fieldsize)]
-
-    prepare_setinteriorfield_raw = prepare_setinteriorfield_gc
-
-    def prepare_arraylen_gc(self, op):
-        arraydescr = op.getdescr()
-        assert isinstance(arraydescr, ArrayDescr)
-        ofs = arraydescr.lendescr.offset
-        assert _check_imm_arg(ofs)
-        base_loc = self.ensure_reg(op.getarg(0))
-        self.free_op_vars()
-        res = self.force_allocate_reg(op)
-        return [res, base_loc, imm(ofs)]
-
-    def prepare_setarrayitem_gc(self, op):
-        size, ofs, _ = unpack_arraydescr(op.getdescr())
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        value_loc = self.ensure_reg(op.getarg(2))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        imm_size = imm(size)
-        return [base_loc, index_loc, value_loc, ofs_loc,
-                imm_size, imm_size]
-
-    prepare_setarrayitem_raw = prepare_setarrayitem_gc
-
-    def prepare_raw_store(self, op):
-        size, ofs, _ = unpack_arraydescr(op.getdescr())
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        value_loc = self.ensure_reg(op.getarg(2))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        return [base_loc, index_loc, value_loc, ofs_loc,
-                imm(1), imm(size)]
-
-    def _prepare_getarrayitem(self, op):
-        size, ofs, sign = unpack_arraydescr(op.getdescr())
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        imm_size = imm(size)
-        return [base_loc, index_loc, result_loc, ofs_loc,
-                imm_size, imm_size, imm(sign)]
-
-    prepare_getarrayitem_gc_i = _prepare_getarrayitem
-    prepare_getarrayitem_gc_r = _prepare_getarrayitem
-    prepare_getarrayitem_gc_f = _prepare_getarrayitem
-    prepare_getarrayitem_raw_i = _prepare_getarrayitem
-    prepare_getarrayitem_raw_f = _prepare_getarrayitem
-    prepare_getarrayitem_gc_pure_i = _prepare_getarrayitem
-    prepare_getarrayitem_gc_pure_r = _prepare_getarrayitem
-    prepare_getarrayitem_gc_pure_f = _prepare_getarrayitem
-
-    def _prepare_raw_load(self, op):
-        size, ofs, sign = unpack_arraydescr(op.getdescr())
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(ofs))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        return [base_loc, index_loc, result_loc, ofs_loc,
-                imm(1), imm(size), imm(sign)]
-
-    prepare_raw_load_i = _prepare_raw_load
-    prepare_raw_load_f = _prepare_raw_load
-
-    def prepare_strlen(self, op):
-        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
-                                             self.cpu.translate_support_code)
-        base_loc = self.ensure_reg(op.getarg(0))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        return [base_loc, imm(ofs_length), result_loc, imm(WORD), imm(0)]
-
-    def prepare_strgetitem(self, op):
-        basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                    self.cpu.translate_support_code)
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(basesize))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        imm_size = imm(itemsize)
-        return [base_loc, index_loc, result_loc, ofs_loc,
-                imm_size, imm_size, imm(0)]
-
-    def prepare_strsetitem(self, op):
-        basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                    self.cpu.translate_support_code)
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        value_loc = self.ensure_reg(op.getarg(2))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(basesize))
-        imm_size = imm(itemsize)
-        return [base_loc, index_loc, value_loc, ofs_loc,
-                imm_size, imm_size]
-
     def prepare_copystrcontent(self, op):
         src_ptr_loc = self.ensure_reg(op.getarg(0))
         dst_ptr_loc = self.ensure_reg(op.getarg(1))
@@ -856,37 +763,6 @@
 
     prepare_copyunicodecontent = prepare_copystrcontent
 
-    def prepare_unicodelen(self, op):
-        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
-                                             self.cpu.translate_support_code)
-        base_loc = self.ensure_reg(op.getarg(0))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        return [base_loc, imm(ofs_length), result_loc, imm(WORD), imm(0)]
-
-    def prepare_unicodegetitem(self, op):
-        basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                    self.cpu.translate_support_code)
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(basesize))
-        self.free_op_vars()
-        result_loc = self.force_allocate_reg(op)
-        imm_size = imm(itemsize)
-        return [base_loc, index_loc, result_loc, ofs_loc,
-                imm_size, imm_size, imm(0)]
-
-    def prepare_unicodesetitem(self, op):
-        basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                    self.cpu.translate_support_code)
-        base_loc = self.ensure_reg(op.getarg(0))
-        index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
-        value_loc = self.ensure_reg(op.getarg(2))
-        ofs_loc = self.ensure_reg_or_16bit_imm(ConstInt(basesize))
-        imm_size = imm(itemsize)
-        return [base_loc, index_loc, value_loc, ofs_loc,
-                imm_size, imm_size]
-
     prepare_same_as_i = helper.prepare_unary_op
     prepare_same_as_r = helper.prepare_unary_op
     prepare_same_as_f = helper.prepare_unary_op
@@ -1078,12 +954,6 @@
         arglocs = self._prepare_guard(op)
         return arglocs
 
-    def prepare_zero_ptr_field(self, op):
-        base_loc = self.ensure_reg(op.getarg(0))
-        ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(1))
-        value_loc = self.ensure_reg(ConstInt(0))
-        return [value_loc, base_loc, ofs_loc, imm(WORD)]
-
     def prepare_zero_array(self, op):
         itemsize, ofs, _ = unpack_arraydescr(op.getdescr())
         base_loc = self.ensure_reg(op.getarg(0))
diff --git a/rpython/jit/backend/ppc/runner.py b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -21,6 +21,9 @@
     IS_64_BIT = True
     backend_name = 'ppc64'
 
+    # can an ISA instruction handle a factor to the offset?
+    load_supported_factors = (1,)
+
     from rpython.jit.backend.ppc.register import JITFRAME_FIXED_SIZE
     frame_reg = r.SP
     all_reg_indexes = [-1] * 32
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -4,8 +4,7 @@
 
 import os, sys
 from rpython.jit.backend.llsupport import symbolic
-from rpython.jit.backend.llsupport.descr import (ArrayDescr, CallDescr,
-    unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
+from rpython.jit.backend.llsupport.descr import CallDescr, unpack_arraydescr
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
      RegisterManager, TempVar, compute_vars_longevity, is_comparison_or_ovf_op,
@@ -1086,9 +1085,9 @@
         result_loc = self.force_allocate_reg(op)
         size_box = op.getarg(2)
         assert isinstance(size_box, ConstInt)
-        size = size_box.value
-        size_loc = imm(abs(size))
-        if size < 0:
+        nsize = size_box.value      # negative for "signed"
+        size_loc = imm(abs(nsize))
+        if nsize < 0:
             sign_loc = imm1
         else:
             sign_loc = imm0
@@ -1111,9 +1110,9 @@
         assert isinstance(size_box, ConstInt)
         scale = scale_box.value
         offset = offset_box.value
-        size = size_box.value
-        size_loc = imm(abs(size))
-        if size < 0:
+        nsize = size_box.value      # negative for "signed"
+        size_loc = imm(abs(nsize))
+        if nsize < 0:
             sign_loc = imm1
         else:
             sign_loc = imm0
diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -92,6 +92,12 @@
         # fill in the jf_descr and jf_gcmap fields of the frame according
         # to which failure we are resuming from.  These are set before
         # this function is called (see generate_quick_failure()).
+
+        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+        ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+        self.mc.STG(r.SCRATCH2, l.addr(ofs2, r.SPP))
+        self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
+
         self._push_core_regs_to_jitframe(mc)
         if withfloats:
             self._push_fp_regs_to_jitframe(mc)
@@ -123,13 +129,10 @@
         assert target != 0
         pool_offset = guardtok._pool_offset
 
-        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
-        ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
 
         # overwrite the gcmap in the jitframe
         offset = pool_offset + RECOVERY_GCMAP_POOL_OFFSET
-        self.mc.LG(r.SCRATCH, l.pool(offset))
-        self.mc.STG(r.SCRATCH, l.addr(ofs2, r.SPP))
+        self.mc.LG(r.SCRATCH2, l.pool(offset))
 
         # overwrite the target in pool
         offset = pool_offset + RECOVERY_TARGET_POOL_OFFSET
@@ -138,7 +141,6 @@
 
         self.mc.load_imm(r.SCRATCH, fail_descr)
         #self.mc.LGFI(r.SCRATCH, l.imm(fail_descr))
-        self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
         self.mc.BCR(l.imm(0xf), r.r14)
 
         return startpos
diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py
--- a/rpython/jit/backend/zarch/codebuilder.py
+++ b/rpython/jit/backend/zarch/codebuilder.py
@@ -133,7 +133,7 @@
         self.TRAP2()
 
     def trace(self):
-        pass
+        self.SVC(l.imm(142))
         #self.LGHI(r.r2, 17)
         #self.XGR(r.r3, r.r3)
         #self.SVC(l.imm(17))
diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -747,6 +747,48 @@
     def prepare_call_malloc_gc(self, op):
         return self._prepare_call_default(op)
 
+    def prepare_call_malloc_nursery(self, op):
+        xxx
+        self.rm.force_allocate_reg(op, selected_reg=r.RES)
+        self.rm.temp_boxes.append(op)
+        tmp_box = TempInt()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=r.RSZ)
+        self.rm.temp_boxes.append(tmp_box)
+        return []
+
+    def prepare_call_malloc_nursery_varsize_frame(self, op):
+        xxx
+        sizeloc = self.ensure_reg(op.getarg(0))
+        # sizeloc must be in a register, but we can free it now
+        # (we take care explicitly of conflicts with r.RES or r.RSZ)
+        self.free_op_vars()
+        # the result will be in r.RES
+        self.rm.force_allocate_reg(op, selected_reg=r.RES)
+        self.rm.temp_boxes.append(op)
+        # we need r.RSZ as a temporary
+        tmp_box = TempInt()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=r.RSZ)
+        self.rm.temp_boxes.append(tmp_box)
+        return [sizeloc]
+
+    def prepare_call_malloc_nursery_varsize(self, op):
+        xxx
+        # the result will be in r.RES
+        self.rm.force_allocate_reg(op, selected_reg=r.RES)
+        self.rm.temp_boxes.append(op)
+        # we need r.RSZ as a temporary
+        tmp_box = TempInt()
+        self.rm.force_allocate_reg(tmp_box, selected_reg=r.RSZ)
+        self.rm.temp_boxes.append(tmp_box)
+        # length_box always survives: it's typically also present in the
+        # next operation that will copy it inside the new array.  Make
+        # sure it is in a register different from r.RES and r.RSZ.  (It
+        # should not be a ConstInt at all.)
+        length_box = op.getarg(2)
+        lengthloc = self.ensure_reg(length_box)
+        return [lengthloc]
+
+
     def _prepare_gc_load(self, op):
         base_loc = self.ensure_reg(op.getarg(0), force_in_reg=True)
         index_loc = self.ensure_reg_or_any_imm(op.getarg(1))
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -1021,18 +1021,20 @@
             kind = getkind(op.result.concretetype)[0]
             return SpaceOperation('getinteriorfield_gc_%s' % kind, args,
                                   op.result)
-        elif isinstance(op.args[0].concretetype.TO, lltype.GcStruct):
-            # special-case 2: GcStruct with Array field
-            v_inst, c_field, v_index = op.args
-            STRUCT = v_inst.concretetype.TO
-            ARRAY = getattr(STRUCT, c_field.value)
-            assert isinstance(ARRAY, lltype.Array)
-            arraydescr = self.cpu.arraydescrof(STRUCT)
-            kind = getkind(op.result.concretetype)[0]
-            assert kind in ('i', 'f')
-            return SpaceOperation('getarrayitem_gc_%s' % kind,
-                                  [op.args[0], v_index, arraydescr],
-                                  op.result)
+        #elif isinstance(op.args[0].concretetype.TO, lltype.GcStruct):
+        #    # special-case 2: GcStruct with Array field
+        #    ---was added in the faster-rstruct branch,---
+        #    ---no longer directly supported---
+        #    v_inst, c_field, v_index = op.args
+        #    STRUCT = v_inst.concretetype.TO
+        #    ARRAY = getattr(STRUCT, c_field.value)
+        #    assert isinstance(ARRAY, lltype.Array)
+        #    arraydescr = self.cpu.arraydescrof(STRUCT)
+        #    kind = getkind(op.result.concretetype)[0]
+        #    assert kind in ('i', 'f')
+        #    return SpaceOperation('getarrayitem_gc_%s' % kind,
+        #                          [op.args[0], v_index, arraydescr],
+        #                          op.result)
         else:
             assert False, 'not supported'
 
@@ -1084,6 +1086,25 @@
         return SpaceOperation('raw_load_%s' % kind,
                               [op.args[0], op.args[1], descr], op.result)
 
+    def rewrite_op_gc_load_indexed(self, op):
+        T = op.result.concretetype
+        kind = getkind(T)[0]
+        assert kind != 'r'
+        descr = self.cpu.arraydescrof(rffi.CArray(T))
+        if (not isinstance(op.args[2], Constant) or
+            not isinstance(op.args[3], Constant)):
+            raise NotImplementedError("gc_load_indexed: 'scale' and 'base_ofs'"
+                                      " should be constants")
+        # xxx hard-code the size in bytes at translation time, which is
+        # probably fine and avoids lots of issues later
+        bytes = descr.get_item_size_in_bytes()
+        if descr.is_item_signed():
+            bytes = -bytes
+        c_bytes = Constant(bytes, lltype.Signed)
+        return SpaceOperation('gc_load_indexed_%s' % kind,
+                              [op.args[0], op.args[1],
+                               op.args[2], op.args[3], c_bytes], op.result)
+
     def _rewrite_equality(self, op, opname):
         arg0, arg1 = op.args
         if isinstance(arg0, Constant) and not arg0.value:
diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -1434,6 +1434,13 @@
     def bhimpl_raw_load_f(cpu, addr, offset, arraydescr):
         return cpu.bh_raw_load_f(addr, offset, arraydescr)
 
+    @arguments("cpu", "r", "i", "i", "i", "i", returns="i")
+    def bhimpl_gc_load_indexed_i(cpu, addr, index, scale, base_ofs, bytes):
+        return cpu.bh_gc_load_indexed_i(addr, index,scale,base_ofs, bytes)
+    @arguments("cpu", "r", "i", "i", "i", "i", returns="f")
+    def bhimpl_gc_load_indexed_f(cpu, addr, index, scale, base_ofs, bytes):
+        return cpu.bh_gc_load_indexed_f(addr, index,scale,base_ofs, bytes)
+
     @arguments("r", "d", "d")
     def bhimpl_record_quasiimmut_field(struct, fielddescr, mutatefielddescr):
         pass
diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -948,6 +948,7 @@
         # the virtualrefs and virtualizable have been forced by
         # handle_async_forcing() just a moment ago.
         from rpython.jit.metainterp.blackhole import resume_in_blackhole
+        import pdb; pdb.set_trace()
         hidden_all_virtuals = metainterp_sd.cpu.get_savedata_ref(deadframe)
         obj = AllVirtuals.show(metainterp_sd.cpu, hidden_all_virtuals)
         all_virtuals = obj.cache
diff --git a/rpython/jit/metainterp/optimizeopt/heap.py b/rpython/jit/metainterp/optimizeopt/heap.py
--- a/rpython/jit/metainterp/optimizeopt/heap.py
+++ b/rpython/jit/metainterp/optimizeopt/heap.py
@@ -535,16 +535,10 @@
         cf.do_setfield(self, op)
 
     def optimize_GETARRAYITEM_GC_I(self, op):
-        # When using str_storage_getitem it might happen that op.getarg(0) is
-        # a virtual string, NOT an array. In that case, we cannot cache the
-        # getarrayitem as if it were an array, obviously. In theory we could
-        # improve by writing special code to interpter the buffer of the
-        # virtual string as if it were an array, but it looks complicate,
-        # fragile and not worth it.
         arrayinfo = self.ensure_ptr_info_arg0(op)
         indexb = self.getintbound(op.getarg(1))
         cf = None
-        if indexb.is_constant() and not arrayinfo.is_vstring():
+        if indexb.is_constant():
             index = indexb.getint()
             arrayinfo.getlenbound(None).make_gt_const(index)
             # use the cache on (arraydescr, index), which is a constant
@@ -561,7 +555,7 @@
         self.make_nonnull(op.getarg(0))
         self.emit_operation(op)
         # the remember the result of reading the array item
-        if cf is not None and not arrayinfo.is_vstring():
+        if cf is not None:
             arrayinfo.setitem(op.getdescr(), indexb.getint(),
                               self.get_box_replacement(op.getarg(0)),
                               self.get_box_replacement(op), cf,
diff --git a/rpython/jit/metainterp/optimizeopt/info.py b/rpython/jit/metainterp/optimizeopt/info.py
--- a/rpython/jit/metainterp/optimizeopt/info.py
+++ b/rpython/jit/metainterp/optimizeopt/info.py
@@ -24,9 +24,6 @@
     def is_virtual(self):
         return False
 
-    def is_vstring(self):
-        return False
-
     def is_precise(self):
         return False
 
diff --git a/rpython/jit/metainterp/optimizeopt/virtualize.py b/rpython/jit/metainterp/optimizeopt/virtualize.py
--- a/rpython/jit/metainterp/optimizeopt/virtualize.py
+++ b/rpython/jit/metainterp/optimizeopt/virtualize.py
@@ -277,10 +277,8 @@
             self.emit_operation(op)
 
     def optimize_GETARRAYITEM_GC_I(self, op):
-        # When using str_storage_getitem we op.getarg(0) is a string, NOT an
-        # array, hence the check. In that case, it will be forced
         opinfo = self.getptrinfo(op.getarg(0))
-        if opinfo and opinfo.is_virtual() and not opinfo.is_vstring():
+        if opinfo and opinfo.is_virtual():
             indexbox = self.get_constant_box(op.getarg(1))
             if indexbox is not None:
                 item = opinfo.getitem(op.getdescr(), indexbox.getint())
diff --git a/rpython/jit/metainterp/optimizeopt/vstring.py b/rpython/jit/metainterp/optimizeopt/vstring.py
--- a/rpython/jit/metainterp/optimizeopt/vstring.py
+++ b/rpython/jit/metainterp/optimizeopt/vstring.py
@@ -62,9 +62,6 @@
         self.mode = mode
         self.length = length
 
-    def is_vstring(self):
-        return True
-
     def getlenbound(self, mode):
         from rpython.jit.metainterp.optimizeopt import intutils
 
diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -810,6 +810,27 @@
         return self.execute_with_descr(rop.RAW_LOAD_F, arraydescr,
                                        addrbox, offsetbox)
 
+    def _remove_symbolics(self, c):
+        if not we_are_translated():
+            from rpython.rtyper.lltypesystem import ll2ctypes
+            assert isinstance(c, ConstInt)
+            c = ConstInt(ll2ctypes.lltype2ctypes(c.value))
+        return c
+
+    @arguments("box", "box", "box", "box", "box")
+    def opimpl_gc_load_indexed_i(self, addrbox, indexbox,
+                                 scalebox, baseofsbox, bytesbox):
+        return self.execute(rop.GC_LOAD_INDEXED_I, addrbox, indexbox,
+                            self._remove_symbolics(scalebox),
+                            self._remove_symbolics(baseofsbox), bytesbox)
+
+    @arguments("box", "box", "box", "box", "box")
+    def opimpl_gc_load_indexed_f(self, addrbox, indexbox,
+                                 scalebox, baseofsbox, bytesbox):
+        return self.execute(rop.GC_LOAD_INDEXED_F, addrbox, indexbox,
+                            self._remove_symbolics(scalebox),
+                            self._remove_symbolics(baseofsbox), bytesbox)
+
     @arguments("box")
     def opimpl_hint_force_virtualizable(self, box):
         self.metainterp.gen_store_back_in_vable(box)
diff --git a/rpython/jit/metainterp/test/test_strstorage.py b/rpython/jit/metainterp/test/test_strstorage.py
--- a/rpython/jit/metainterp/test/test_strstorage.py
+++ b/rpython/jit/metainterp/test/test_strstorage.py
@@ -19,7 +19,7 @@
         res = self.interp_operations(f, [], supports_singlefloats=True)
         #
         kind = getkind(TYPE)[0] # 'i' or 'f'
-        self.check_operations_history({'getarrayitem_gc_%s' % kind: 1,
+        self.check_operations_history({'gc_load_indexed_%s' % kind: 1,
                                        'finish': 1})
         #
         if TYPE == lltype.SingleFloat:
@@ -29,8 +29,8 @@
             return longlong.int2singlefloat(res)
         return res
 
-    def str_storage_supported(self, TYPE):
-        py.test.skip('this is not a JIT test')
+    #def str_storage_supported(self, TYPE):
+    #    py.test.skip('this is not a JIT test')
 
     def test_force_virtual_str_storage(self):
         byteorder = sys.byteorder
@@ -48,6 +48,6 @@
             'strsetitem': 1,          # str forcing
             'call_pure_r': 1,         # str forcing (copystrcontent)
             'guard_no_exception': 1,  # str forcing
-            'getarrayitem_gc_i': 1,   # str_storage_getitem
+            'gc_load_indexed_i': 1,   # str_storage_getitem
             'finish': 1
             })
diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py
--- a/rpython/rlib/buffer.py
+++ b/rpython/rlib/buffer.py
@@ -97,6 +97,18 @@
 
     def __init__(self, buffer, offset, size):
         self.readonly = buffer.readonly
+        if isinstance(buffer, SubBuffer):     # don't nest them
+            # we want a view (offset, size) over a view
+            # (buffer.offset, buffer.size) over buffer.buffer.
+            # Note that either '.size' can be -1 to mean 'up to the end'.
+            at_most = buffer.getlength() - offset
+            if size > at_most or size < 0:
+                if at_most < 0:
+                    at_most = 0
+                size = at_most
+            offset += buffer.offset
+            buffer = buffer.buffer
+        #
         self.buffer = buffer
         self.offset = offset
         self.size = size
diff --git a/rpython/rlib/entrypoint.py b/rpython/rlib/entrypoint.py
--- a/rpython/rlib/entrypoint.py
+++ b/rpython/rlib/entrypoint.py
@@ -1,4 +1,4 @@
-secondary_entrypoints = {}
+secondary_entrypoints = {"main": []}
 
 import py
 from rpython.rtyper.lltypesystem import lltype, rffi
@@ -109,20 +109,3 @@
                     "you.  Another difference is that entrypoint_highlevel() "
                     "returns the normal Python function, which can be safely "
                     "called from more Python code.")
-
-
-# the point of dance below is so the call to rpython_startup_code actually
-# does call asm_stack_bottom. It's here because there is no other good place.
-# This thing is imported by any target which has any API, so it'll get
-# registered
-
-RPython_StartupCode = rffi.llexternal('RPython_StartupCode', [], lltype.Void,
-                                      _nowrapper=True,
-                                      random_effects_on_gcobjs=True)
-
- at entrypoint_lowlevel('main', [], c_name='rpython_startup_code')
-def rpython_startup_code():
-    rffi.stackcounter.stacks_counter += 1
-    llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
-    RPython_StartupCode()
-    rffi.stackcounter.stacks_counter -= 1
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -114,6 +114,8 @@
 
 specialize = _Specialize()
 
+NOT_CONSTANT = object()      # to use in enforceargs()
+
 def enforceargs(*types_, **kwds):
     """ Decorate a function with forcing of RPython-level types on arguments.
     None means no enforcing.
@@ -333,6 +335,25 @@
     # XXX this can be made more efficient in the future
     return bytearray(str(i))
 
+def fetch_translated_config():
+    """Returns the config that is current when translating.
+    Returns None if not translated.
+    """
+    return None
+
+class Entry(ExtRegistryEntry):
+    _about_ = fetch_translated_config
+
+    def compute_result_annotation(self):
+        config = self.bookkeeper.annotator.translator.config
+        return self.bookkeeper.immutablevalue(config)
+
+    def specialize_call(self, hop):
+        from rpython.rtyper.lltypesystem import lltype
+        translator = hop.rtyper.annotator.translator
+        hop.exception_cannot_occur()
+        return hop.inputconst(lltype.Void, translator.config)
+
 # ____________________________________________________________
 
 class FREED_OBJECT(object):
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2794,8 +2794,10 @@
 
 def parse_digit_string(parser):
     # helper for fromstr
+    base = parser.base
+    if (base & (base - 1)) == 0:
+        return parse_string_from_binary_base(parser)
     a = rbigint()
-    base = parser.base
     digitmax = BASE_MAX[base]
     tens, dig = 1, 0
     while True:
@@ -2811,3 +2813,52 @@
             tens *= base
     a.sign *= parser.sign
     return a
+
+def parse_string_from_binary_base(parser):
+    # The point to this routine is that it takes time linear in the number of
+    # string characters.
+    from rpython.rlib.rstring import ParseStringError
+
+    base = parser.base
+    if   base ==  2: bits_per_char = 1
+    elif base ==  4: bits_per_char = 2
+    elif base ==  8: bits_per_char = 3
+    elif base == 16: bits_per_char = 4
+    elif base == 32: bits_per_char = 5
+    else:
+        raise AssertionError
+
+    # n <- total number of bits needed, while moving 'parser' to the end
+    n = 0
+    while parser.next_digit() >= 0:
+        n += 1
+
+    # b <- number of Python digits needed, = ceiling(n/SHIFT). */
+    try:
+        b = ovfcheck(n * bits_per_char)
+        b = ovfcheck(b + (SHIFT - 1))
+    except OverflowError:
+        raise ParseStringError("long string too large to convert")
+    b = (b // SHIFT) or 1
+    z = rbigint([NULLDIGIT] * b, sign=parser.sign)
+
+    # Read string from right, and fill in long from left; i.e.,
+    # from least to most significant in both.
+    accum = _widen_digit(0)
+    bits_in_accum = 0
+    pdigit = 0
+    for _ in range(n):
+        k = parser.prev_digit()
+        accum |= _widen_digit(k) << bits_in_accum
+        bits_in_accum += bits_per_char
+        if bits_in_accum >= SHIFT:
+            z.setdigit(pdigit, accum)
+            pdigit += 1
+            assert pdigit <= b
+            accum >>= SHIFT
+            bits_in_accum -= SHIFT
+
+    if bits_in_accum:
+        z.setdigit(pdigit, accum)
+    z._normalize()
+    return z
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -9,7 +9,7 @@
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rlib.rarithmetic import intmask, widen
 from rpython.rlib.objectmodel import (
-    specialize, enforceargs, register_replacement_for)
+    specialize, enforceargs, register_replacement_for, NOT_CONSTANT)
 from rpython.rlib.signature import signature
 from rpython.rlib import types
 from rpython.annotator.model import s_Str0
@@ -415,7 +415,7 @@
 
 @replace_os_function('open')
 @specialize.argtype(0)
- at enforceargs(None, int, int, typecheck=False)
+ at enforceargs(NOT_CONSTANT, int, int, typecheck=False)
 def open(path, flags, mode):
     if _prefer_unicode(path):
         fd = c_wopen(_as_unicode0(path), flags, mode)
diff --git a/rpython/rlib/rshrinklist.py b/rpython/rlib/rshrinklist.py
--- a/rpython/rlib/rshrinklist.py
+++ b/rpython/rlib/rshrinklist.py
@@ -6,6 +6,8 @@
     The twist is that occasionally append() will throw away the
     items for which must_keep() returns False.  (It does so without
     changing the order.)
+
+    See also rpython.rlib.rweaklist.
     """
     _mixin_ = True
 
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -485,6 +485,24 @@
         else:
             return -1
 
+    def prev_digit(self):
+        # After exhausting all n digits in next_digit(), you can walk them
+        # again in reverse order by calling prev_digit() exactly n times
+        i = self.i - 1
+        assert i >= 0
+        self.i = i
+        c = self.s[i]
+        digit = ord(c)
+        if '0' <= c <= '9':
+            digit -= ord('0')
+        elif 'A' <= c <= 'Z':
+            digit = (digit - ord('A')) + 10
+        elif 'a' <= c <= 'z':
+            digit = (digit - ord('a')) + 10
+        else:
+            raise AssertionError
+        return digit
+
 # -------------- public API ---------------------------------
 
 INIT_SIZE = 100 # XXX tweak
diff --git a/rpython/rlib/rstruct/nativefmttable.py b/rpython/rlib/rstruct/nativefmttable.py
--- a/rpython/rlib/rstruct/nativefmttable.py
+++ b/rpython/rlib/rstruct/nativefmttable.py
@@ -11,7 +11,6 @@
 from rpython.rlib.rstruct.standardfmttable import native_is_bigendian
 from rpython.rlib.rstruct.error import StructError
 from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.strstorage import str_storage_getitem
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.tool import rffi_platform
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
diff --git a/rpython/rlib/rstruct/standardfmttable.py b/rpython/rlib/rstruct/standardfmttable.py
--- a/rpython/rlib/rstruct/standardfmttable.py
+++ b/rpython/rlib/rstruct/standardfmttable.py
@@ -12,7 +12,7 @@
 from rpython.rlib.rstruct import ieee
 from rpython.rlib.rstruct.error import StructError, StructOverflowError
 from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.strstorage import str_storage_getitem, str_storage_supported
+from rpython.rlib.strstorage import str_storage_getitem
 from rpython.rlib import rarithmetic
 from rpython.rtyper.lltypesystem import rffi
 
@@ -185,13 +185,14 @@
             data = fmtiter.read(size)
             fmtiter.appendobj(ieee.unpack_float(data, fmtiter.bigendian))
             return
-        if not str_storage_supported(TYPE):
-            # this happens e.g. on win32 and ARM32: we cannot read the string
-            # content as an array of doubles because it's not properly
-            # aligned. But we can read a longlong and convert to float
-            assert TYPE == rffi.DOUBLE
-            assert rffi.sizeof(TYPE) == 8
-            return unpack_longlong2float(fmtiter)
+        ## XXX check if the following code is still needed
+        ## if not str_storage_supported(TYPE):
+        ##     # this happens e.g. on win32 and ARM32: we cannot read the string
+        ##     # content as an array of doubles because it's not properly
+        ##     # aligned. But we can read a longlong and convert to float
+        ##     assert TYPE == rffi.DOUBLE
+        ##     assert rffi.sizeof(TYPE) == 8
+        ##     return unpack_longlong2float(fmtiter)
         try:
             # fast path
             val = unpack_fastpath(TYPE)(fmtiter)
@@ -246,7 +247,7 @@
 
     @specialize.argtype(0)
     def unpack_int_fastpath_maybe(fmtiter):
-        if fmtiter.bigendian != native_is_bigendian or not str_storage_supported(TYPE):
+        if fmtiter.bigendian != native_is_bigendian or not native_is_ieee754: ## or not str_storage_supported(TYPE):
             return False
         try:
             intvalue = unpack_fastpath(TYPE)(fmtiter)
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -291,8 +291,6 @@
 # ____________________________________________________________
 #
 # Thread-locals.
-# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR!
-# We use _make_sure_does_not_move() to make sure the pointer will not move.
 
 
 class ThreadLocalField(object):
@@ -351,6 +349,11 @@
 
 
 class ThreadLocalReference(ThreadLocalField):
+    # A thread-local that points to an object.  The object stored in such
+    # a thread-local is kept alive as long as the thread is not finished
+    # (but only with our own GCs!  it seems not to work with Boehm...)
+    # (also, on Windows, if you're not making a DLL but an EXE, it will
+    # leak the objects when a thread finishes; see threadlocal.c.)
     _COUNT = 1
 
     def __init__(self, Cls, loop_invariant=False):
@@ -378,20 +381,41 @@
             assert isinstance(value, Cls) or value is None
             if we_are_translated():
                 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-                from rpython.rlib.rgc import _make_sure_does_not_move
-                from rpython.rlib.objectmodel import running_on_llinterp
                 gcref = cast_instance_to_gcref(value)
-                if not running_on_llinterp:
-                    if gcref:
-                        _make_sure_does_not_move(gcref)
                 value = lltype.cast_ptr_to_int(gcref)
                 setraw(value)
+                rgc.register_custom_trace_hook(TRACETLREF, _lambda_trace_tlref)
+                rgc.ll_writebarrier(_tracetlref_obj)
             else:
                 self.local.value = value
 
         self.get = get
         self.set = set
 
+        def _trace_tlref(gc, obj, callback, arg):
+            p = llmemory.NULL
+            llop.threadlocalref_acquire(lltype.Void)
+            while True:
+                p = llop.threadlocalref_enum(llmemory.Address, p)
+                if not p:
+                    break
+                gc._trace_callback(callback, arg, p + offset)
+            llop.threadlocalref_release(lltype.Void)
+        _lambda_trace_tlref = lambda: _trace_tlref
+        TRACETLREF = lltype.GcStruct('TRACETLREF')
+        _tracetlref_obj = lltype.malloc(TRACETLREF, immortal=True)
+
+    @staticmethod
+    def automatic_keepalive(config):
+        """Returns True if translated with a GC that keeps alive
+        the set() value until the end of the thread.  Returns False
+        if you need to keep it alive yourself (but in that case, you
+        should also reset it to None before the thread finishes).
+        """
+        return (config.translation.gctransformer == "framework" and
+                # see translator/c/src/threadlocal.c for the following line
+                (not _win32 or config.translation.shared))
+
 
 tlfield_thread_ident = ThreadLocalField(lltype.Signed, "thread_ident",
                                         loop_invariant=True)
@@ -399,7 +423,8 @@
                                    loop_invariant=True)
 tlfield_rpy_errno = ThreadLocalField(rffi.INT, "rpy_errno")
 tlfield_alt_errno = ThreadLocalField(rffi.INT, "alt_errno")
-if sys.platform == "win32":
+_win32 = (sys.platform == "win32")
+if _win32:
     from rpython.rlib import rwin32
     tlfield_rpy_lasterror = ThreadLocalField(rwin32.DWORD, "rpy_lasterror")
     tlfield_alt_lasterror = ThreadLocalField(rwin32.DWORD, "alt_lasterror")
diff --git a/rpython/rlib/rweaklist.py b/rpython/rlib/rweaklist.py
--- a/rpython/rlib/rweaklist.py
+++ b/rpython/rlib/rweaklist.py
@@ -5,6 +5,13 @@
 
 
 class RWeakListMixin(object):
+    """A mixin base class.  A collection that weakly maps indexes to objects.
+    After an object goes away, its index is marked free and will be reused
+    by some following add_handle() call.  So add_handle() might not append
+    the object at the end of the list, but can put it anywhere.
+
+    See also rpython.rlib.rshrinklist.
+    """
     _mixin_ = True
 
     def initialize(self):
diff --git a/rpython/rlib/strstorage.py b/rpython/rlib/strstorage.py
--- a/rpython/rlib/strstorage.py
+++ b/rpython/rlib/strstorage.py
@@ -9,54 +9,31 @@
 #      rstr.py:copy_string_contents), which has no chance to work during
 #      tracing
 #
-#   2. use llop.raw_load: despite the name, llop.raw_load DOES support reading
-#      from GC pointers. However:
-#
-#        a. we would like to use a CompositeOffset as the offset (using the
-#           same logic as in rstr.py:_get_raw_str_buf), but this is not (yet)
-#           supported before translation: it works only if you pass an actual
-#           integer
-#
-#        b. raw_load from a GC pointer is not (yet) supported by the
-#           JIT. There are plans to introduce a gc_load operation: when it
-#           will be there, we could fix the issue above and actually use it to
-#           implement str_storage_getitem
-#
-#   3. the actual solution: cast rpy_string to a GcStruct which has the very
+#   2. cast rpy_string to a GcStruct which has the very
 #      same layout, with the only difference that its 'chars' field is no
 #      longer an Array(Char) but e.e. an Array(Signed). Then, we just need to
-#      read the appropriate index into the array