[pypy-commit] pypy stdlib-2.7.3: hg merge default

Fri Sep 7 19:52:13 CEST 2012

Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: stdlib-2.7.3
Changeset: r57230:802440ad8f1c
Date: 2012-09-07 19:51 +0200
http://bitbucket.org/pypy/pypy/changeset/802440ad8f1c/

Log:	hg merge default

diff too long, truncating to 10000 out of 13640 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -1,5 +1,6 @@
 syntax: glob
 *.py[co]
+*.sw[po]
 *~
 .*.swp
 .idea
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -24,6 +24,7 @@
     'maemo',
     'host',
     'distutils',
+    'arm',
 ]
 
 translation_optiondescription = OptionDescription(
@@ -117,7 +118,7 @@
                          ("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
                          ("translation.list_comprehension_operations", True)]),
     ChoiceOption("jit_backend", "choose the backend for the JIT",
-                 ["auto", "x86", "x86-without-sse2", "llvm"],
+                 ["auto", "x86", "x86-without-sse2", "llvm", 'arm'],
                  default="auto", cmdline="--jit-backend"),
     ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
                  ["off", "oprofile"],
@@ -406,7 +407,7 @@
     set_platform(config.translation.platform, config.translation.cc)
 
 def get_platform(config):
-    from pypy.translator.platform import pick_platform    
+    from pypy.translator.platform import pick_platform
     opt = config.translation.platform
     cc = config.translation.cc
     return pick_platform(opt, cc)
diff --git a/pypy/conftest.py b/pypy/conftest.py
--- a/pypy/conftest.py
+++ b/pypy/conftest.py
@@ -553,6 +553,7 @@
 
     def _spawn(self, *args, **kwds):
         import pexpect
+        kwds.setdefault('timeout', 600)
         child = pexpect.spawn(*args, **kwds)
         child.logfile = sys.stdout
         return child
diff --git a/pypy/doc/arm.rst b/pypy/doc/arm.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/arm.rst
@@ -0,0 +1,150 @@
+=========================
+Cross-translating for ARM
+=========================
+
+
+Here we describe the setup required and the steps needed to follow to translate
+an interpreter using the RPython translator to target ARM using a cross
+compilation toolchain.
+
+To translate an RPython program for ARM we can either
+translate directly on an ARM device following the normal translation steps. Unfortunately this is not really feasible on most ARM machines. The alternative is to cross-translate using a cross-compilation toolchain.
+
+To cross-translate we run the translation on a more powerful (usually
+x86) machine and generate a binary for ARM using a cross-compiler to compile
+the generated C code. There are several constraints when doing this. In
+particular we currently only support Linux as translation host and target
+platforms (tested on Ubuntu). Also we need a 32-bit environment to run the
+translation. This can be done either on a 32bit host or in 32bit chroot.
+
+
+Requirements
+------------
+
+The tools required to cross translate from a Linux based host to an ARM based Linux target are:
+
+- A checkout of PyPy's arm-backend-2 branch.
+- The GCC ARM cross compiler (on Ubuntu it is the ``gcc-arm-linux-gnueabi package``) but other toolchains should also work.
+- Scratchbox 2, a cross-compilation engine (``scratchbox2`` Ubuntu package).
+- A 32-bit PyPy or Python.
+- And the following (or corresponding) packages need to be installed to create an ARM based chroot:
+
+  * ``debootstrap`` 
+  * ``schroot``
+  * ``binfmt-support``
+  * ``qemu-system``
+  * ``qemu-user-static``
+
+
+Creating a Qemu based ARM chroot
+--------------------------------
+
+First we will need to create a rootfs containing the packages and dependencies
+required in order to translate PyPy or other interpreters. We are going to
+assume, that the files will be placed in ``/srv/chroot/precise_arm``.
+
+Create the rootfs by calling:
+
+::
+
+   mkdir -p /srv/chroot/precise_arm
+   qemu-debootstrap --variant=buildd --arch=armel precise /srv/chroot/precise_arm/  http://ports.ubuntu.com/ubuntu-ports/
+
+Next, copy the qemu-arm-static binary to the rootfs.
+
+:: 
+
+  cp /usr/bin/qemu-arm-static /srv/chroot/precise_arm/usr/bin/qemu-arm-static
+
+For easier configuration and management we will create a schroot pointing to
+the rootfs. We need to add a configuration block (like the one below) to the
+schroot configuration file in /etc/schroot/schroot.conf.
+
+
+::
+
+  [precise_arm]
+  directory=/srv/chroot/precise_arm
+  users=USERNAME
+  root-users=USERNAME
+  groups=users
+  aliases=default
+  type=directory
+
+
+To verify that everything is working in the chroot, running ``schroot -c
+precise_arm`` should start a shell running in the schroot environment using
+qemu-arm to execute the ARM binaries. Running ``uname -m`` in the chroot should 
+yeild a result like ``armv7l``. Showing that we are emulating an ARM system.
+
+Start the schroot as the user root in order to configure the apt sources and
+to install the following packages:
+
+
+::
+
+  schroot -c precise_arm -u root
+  echo "deb http://ports.ubuntu.com/ubuntu-ports/ precise main universe restricted" > /etc/apt/sources.list
+  apt-get update
+  apt-get install libffi-dev libgc-dev python-dev build-essential libncurses5-dev libbz2-dev
+
+
+Now all dependencies should be in place and we can exit the schroot environment.
+
+
+Configuring scratchbox2
+-----------------------
+
+To configure the scratchbox we need to cd into the root directory of the rootfs
+we created before. From there we can call the sb2 configuration tools which
+will take the current directory as the base directory for the scratchbox2
+environment.
+
+::
+
+  cd /srv/chroot/precise_arm
+  sb2-init -c `which qemu-arm` ARM `which arm-linux-gnueabi-gcc`
+
+This will create a scratchbox2 based environment called ARM that maps calls to
+gcc done within the scratchbox to the arm-linux-gnueabi-gcc outside the
+scratchbox. Now we should have a working cross compilation toolchain in place
+and can start cross-translating programs for ARM.
+
+Translation
+-----------
+
+Having performed all the preliminary steps we should now be able to cross
+translate a program for ARM.  You can use this_ minimal
+target to test your setup before applying it to a larger project.
+
+Before starting the translator we need to set two environment variables, so the
+translator knows how to use the scratchbox environment. We need to set the
+**SB2** environment variable to point to the rootfs and the **SB2OPT** should
+contain the command line options for the sb2 command. If our rootfs is in the
+folder /srv/chroot/precise_arm and the scratchbox environment is called "ARM",
+the variables would be defined as follows.
+
+
+::
+
+  export SB2=/srv/chroot/precise_arm
+  export SB2OPT='-t ARM'
+
+Once this is set, you can call the translator 
+
+::
+
+  pypy ~/path_to_pypy_checkout/pypy/translator/goal/translate.py -O1 --platform=arm target.py
+
+If everything worked correctly this should yield an ARM binary. Running this binary in the ARM chroot or on an ARM device should produce the output ``"Hello World"``.
+
+.. _`this`:
+
+::
+
+  def main(args):
+      print "Hello World"
+      return 0
+
+  def target(*args):
+      return main, None
\ No newline at end of file
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -37,3 +37,5 @@
 .. branch: jit-opaque-licm
 .. branch: rpython-utf8
 Support for utf-8 encoding in RPython
+.. branch: arm-backend-2
+Support ARM in the JIT.
diff --git a/pypy/jit/backend/arm/__init__.py b/pypy/jit/backend/arm/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/arm/arch.py b/pypy/jit/backend/arm/arch.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/arch.py
@@ -0,0 +1,67 @@
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rarithmetic import r_uint
+
+
+FUNC_ALIGN = 8
+WORD = 4
+DOUBLE_WORD = 8
+
+# the number of registers that we need to save around malloc calls
+N_REGISTERS_SAVED_BY_MALLOC = 9
+# the offset from the FP where the list of the registers mentioned above starts
+MY_COPY_OF_REGS = WORD
+# The Address in the PC points two words befind the current instruction
+PC_OFFSET = 8
+FORCE_INDEX_OFS = 0
+
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+eci = ExternalCompilationInfo(post_include_bits=["""
+static int pypy__arm_int_div(int a, int b) {
+    return a/b;
+}
+static uint pypy__arm_uint_div(uint a, uint b) {
+    return a/b;
+}
+static int pypy__arm_int_mod(uint a, uint b) {
+    return a % b;
+}
+"""])
+
+
+def arm_int_div_emulator(a, b):
+    return int(a / float(b))
+arm_int_div_sign = lltype.Ptr(
+        lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
+arm_int_div = rffi.llexternal(
+    "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed,
+                        _callable=arm_int_div_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+
+
+def arm_uint_div_emulator(a, b):
+    return r_uint(a) / r_uint(b)
+arm_uint_div_sign = lltype.Ptr(
+        lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned))
+arm_uint_div = rffi.llexternal(
+    "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned,
+                        _callable=arm_uint_div_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+
+
+def arm_int_mod_emulator(a, b):
+    sign = 1
+    if a < 0:
+        a = -1 * a
+        sign = -1
+    if b < 0:
+        b = -1 * b
+    res = a % b
+    return sign * res
+arm_int_mod_sign = arm_int_div_sign
+arm_int_mod = rffi.llexternal(
+    "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed,
+                        _callable=arm_int_mod_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/assembler.py
@@ -0,0 +1,1358 @@
+from __future__ import with_statement
+import os
+from pypy.jit.backend.arm.helper.assembler import saved_registers
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD, FUNC_ALIGN, \
+                                    N_REGISTERS_SAVED_BY_MALLOC
+from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
+from pypy.jit.backend.arm.locations import get_fp_offset
+from pypy.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
+                    ARMv7RegisterManager, check_imm_arg,
+                    operations as regalloc_operations,
+                    operations_with_guard as regalloc_operations_with_guard)
+from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
+from pypy.jit.backend.model import CompiledLoopToken
+from pypy.jit.codewriter import longlong
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
+from pypy.jit.metainterp.history import BoxInt, ConstInt
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib import rgc
+from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rpython.annlowlevel import llhelper
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.jit.backend.arm.opassembler import ResOpAssembler
+from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
+                             have_debug_prints)
+from pypy.rlib.jit import AsmInfo
+from pypy.rlib.objectmodel import compute_unique_id
+
+# XXX Move to llsupport
+from pypy.jit.backend.x86.support import values_array, memcpy_fn
+
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('type', lltype.Char),  # 'b'ridge, 'l'abel or
+                                                      # 'e'ntry point
+                              ('number', lltype.Signed))
+
+
+class AssemblerARM(ResOpAssembler):
+
+    STACK_FIXED_AREA = -1
+
+    debug = True
+
+    def __init__(self, cpu, failargs_limit=1000):
+        self.cpu = cpu
+        self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
+        self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
+                                                            failargs_limit)
+        self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
+        self.fail_boxes_count = 0
+        self.fail_force_index = 0
+        self.setup_failure_recovery()
+        self.mc = None
+        self.memcpy_addr = 0
+        self.pending_guards = None
+        self._exit_code_addr = 0
+        self.current_clt = None
+        self.malloc_slowpath = 0
+        self.wb_slowpath = [0, 0, 0, 0]
+        self._regalloc = None
+        self.datablockwrapper = None
+        self.propagate_exception_path = 0
+        self.stack_check_slowpath = 0
+        self._compute_stack_size()
+        self._debug = False
+        self.loop_run_counters = []
+        self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
+
+    def set_debug(self, v):
+        r = self._debug
+        self._debug = v
+        return r
+
+    def _compute_stack_size(self):
+        self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD
+        self.STACK_FIXED_AREA += WORD  # FORCE_TOKEN
+        self.STACK_FIXED_AREA += N_REGISTERS_SAVED_BY_MALLOC * WORD
+        if self.cpu.supports_floats:
+            self.STACK_FIXED_AREA += (len(r.callee_saved_vfp_registers)
+                                        * DOUBLE_WORD)
+        if self.STACK_FIXED_AREA % 8 != 0:
+            self.STACK_FIXED_AREA += WORD  # Stack alignment
+        assert self.STACK_FIXED_AREA % 8 == 0
+
+    def setup(self, looptoken, operations):
+        self.current_clt = looptoken.compiled_loop_token
+        operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu,
+                        operations, self.current_clt.allgcrefs)
+        assert self.memcpy_addr != 0, 'setup_once() not called?'
+        self.mc = ARMv7Builder()
+        self.pending_guards = []
+        assert self.datablockwrapper is None
+        allblocks = self.get_asmmemmgr_blocks(looptoken)
+        self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+                                                        allblocks)
+        self.target_tokens_currently_compiling = {}
+        return operations
+
+    def teardown(self):
+        self.current_clt = None
+        self._regalloc = None
+        self.mc = None
+        self.pending_guards = None
+        assert self.datablockwrapper is None
+
+    def setup_once(self):
+        # Addresses of functions called by new_xxx operations
+        gc_ll_descr = self.cpu.gc_ll_descr
+        gc_ll_descr.initialize()
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
+        if self.cpu.supports_floats:
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
+        self._build_propagate_exception_path()
+        if gc_ll_descr.get_malloc_slowpath_addr is not None:
+            self._build_malloc_slowpath()
+        self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
+        self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+        self._exit_code_addr = self._gen_exit_path()
+        self._leave_jitted_hook_save_exc = \
+                                    self._gen_leave_jitted_hook_code(True)
+        self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False)
+        if not self._debug:
+            # if self._debug is already set it means that someone called
+            # set_debug by hand before initializing the assembler. Leave it
+            # as it is
+            debug_start('jit-backend-counts')
+            self.set_debug(have_debug_prints())
+            debug_stop('jit-backend-counts')
+
+    def finish_once(self):
+        if self._debug:
+            debug_start('jit-backend-counts')
+            for i in range(len(self.loop_run_counters)):
+                struct = self.loop_run_counters[i]
+                if struct.type == 'l':
+                    prefix = 'TargetToken(%d)' % struct.number
+                elif struct.type == 'b':
+                    prefix = 'bridge ' + str(struct.number)
+                else:
+                    prefix = 'entry ' + str(struct.number)
+                debug_print(prefix + ':' + str(struct.i))
+            debug_stop('jit-backend-counts')
+
+    # XXX: merge with x86
+    def _register_counter(self, tp, number, token):
+        # YYY very minor leak -- we need the counters to stay alive
+        # forever, just because we want to report them at the end
+        # of the process
+        struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
+                               track_allocation=False)
+        struct.i = 0
+        struct.type = tp
+        if tp == 'b' or tp == 'e':
+            struct.number = number
+        else:
+            assert token
+            struct.number = compute_unique_id(token)
+        self.loop_run_counters.append(struct)
+        return struct
+
+    def _append_debugging_code(self, operations, tp, number, token):
+        counter = self._register_counter(tp, number, token)
+        c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
+        box = BoxInt()
+        box2 = BoxInt()
+        ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
+                            box, descr=self.debug_counter_descr),
+               ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
+               ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
+                            None, descr=self.debug_counter_descr)]
+        operations.extend(ops)
+
+    @specialize.argtype(1)
+    def _inject_debugging_code(self, looptoken, operations, tp, number):
+        if self._debug:
+            # before doing anything, let's increase a counter
+            s = 0
+            for op in operations:
+                s += op.getopnum()
+            looptoken._arm_debug_checksum = s
+
+            newoperations = []
+            self._append_debugging_code(newoperations, tp, number,
+                                        None)
+            for op in operations:
+                newoperations.append(op)
+                if op.getopnum() == rop.LABEL:
+                    self._append_debugging_code(newoperations, 'l', number,
+                                                op.getdescr())
+            operations = newoperations
+        return operations
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        assert gcrootmap.is_shadow_stack
+        releasegil_func = llhelper(self._NOARG_FUNC,
+                                   self._release_gil_shadowstack)
+        reacqgil_func = llhelper(self._NOARG_FUNC,
+                                 self._reacquire_gil_shadowstack)
+        self.releasegil_addr = rffi.cast(lltype.Signed, releasegil_func)
+        self.reacqgil_addr = rffi.cast(lltype.Signed, reacqgil_func)
+
+    def _gen_leave_jitted_hook_code(self, save_exc):
+        mc = ARMv7Builder()
+        if self.cpu.supports_floats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.lr], floats):
+            addr = self.cpu.get_on_leave_jitted_int(save_exception=save_exc)
+            mc.BL(addr)
+        assert self._exit_code_addr != 0
+        mc.B(self._exit_code_addr)
+        return mc.materialize(self.cpu.asmmemmgr, [],
+                               self.cpu.gc_ll_descr.gcrootmap)
+
+    def _build_propagate_exception_path(self):
+        if self.cpu.propagate_exception_v < 0:
+            return      # not supported (for tests, or non-translated)
+        #
+        mc = ARMv7Builder()
+        # call on_leave_jitted_save_exc()
+        if self.cpu.supports_floats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.lr], floats):
+            addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
+                                                default_to_memoryerror=True)
+            mc.BL(addr)
+        mc.gen_load_int(r.ip.value, self.cpu.propagate_exception_v)
+        mc.MOV_rr(r.r0.value, r.ip.value)
+        self.gen_func_epilog(mc=mc)
+        self.propagate_exception_path = mc.materialize(self.cpu.asmmemmgr, [])
+
+    def _build_stack_check_slowpath(self):
+        _, _, slowpathaddr = self.cpu.insert_stack_check()
+        if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
+            return      # no stack check (for tests, or non-translated)
+        #
+        # make a "function" that is called immediately at the start of
+        # an assembler function.  In particular, the stack looks like:
+        #
+        #    |  retaddr of caller    |   <-- aligned to a multiple of 16
+        #    |  saved argument regs  |
+        #    |  my own retaddr       |    <-- sp
+        #    +-----------------------+
+        #
+        mc = ARMv7Builder()
+        # save argument registers and return address
+        mc.PUSH([reg.value for reg in r.argument_regs] + [r.lr.value])
+        # stack is aligned here
+        # Pass current stack pointer as argument to the call
+        mc.MOV_rr(r.r0.value, r.sp.value)
+        #
+        mc.BL(slowpathaddr)
+
+        # check for an exception
+        mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
+        mc.LDR_ri(r.r0.value, r.r0.value)
+        mc.TST_rr(r.r0.value, r.r0.value)
+        # restore registers and return 
+        # We check for c.EQ here, meaning all bits zero in this case
+        mc.POP([reg.value for reg in r.argument_regs] + [r.pc.value], cond=c.EQ)
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        mc.BL(addr)
+        #
+        mc.gen_load_int(r.r0.value, self.cpu.propagate_exception_v)
+        #
+        # footer -- note the ADD, which skips the return address of this
+        # function, and will instead return to the caller's caller.  Note
+        # also that we completely ignore the saved arguments, because we
+        # are interrupting the function.
+        mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 1) * WORD)
+        mc.POP([r.pc.value])
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.stack_check_slowpath = rawstart
+
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all vfp registers.  It takes a single argument which is in r0.
+        # It must keep stack alignment accordingly.
+        mc = ARMv7Builder()
+        #
+        if withfloats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
+            mc.BL(func)
+        #
+        if withcards:
+            # A final TEST8 before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.LDRB_ri(r.ip.value, r.r0.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+            mc.TST_ri(r.ip.value, imm=0x80)
+        #
+        mc.MOV_rr(r.pc.value, r.lr.value)
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
+    def setup_failure_recovery(self):
+
+        @rgc.no_collect
+        def failure_recovery_func(mem_loc, frame_pointer, stack_pointer):
+            """mem_loc is a structure in memory describing where the values for
+            the failargs are stored.  frame loc is the address of the frame
+            pointer for the frame to be decoded frame """
+            vfp_registers = rffi.cast(rffi.LONGLONGP, stack_pointer)
+            registers = rffi.ptradd(vfp_registers, len(r.all_vfp_regs))
+            registers = rffi.cast(rffi.LONGP, registers)
+            return self.decode_registers_and_descr(mem_loc, frame_pointer,
+                                                    registers, vfp_registers)
+
+        self.failure_recovery_func = failure_recovery_func
+
+    recovery_func_sign = lltype.Ptr(lltype.FuncType([lltype.Signed] * 3,
+                                                        lltype.Signed))
+
+    @rgc.no_collect
+    def decode_registers_and_descr(self, mem_loc, frame_pointer,
+                                                registers, vfp_registers):
+        """Decode locations encoded in memory at mem_loc and write the values
+        to the failboxes.  Values for spilled vars and registers are stored on
+        stack at frame_loc """
+        assert frame_pointer & 1 == 0
+        self.fail_force_index = frame_pointer
+        bytecode = rffi.cast(rffi.UCHARP, mem_loc)
+        num = 0
+        value = 0
+        fvalue = 0
+        code_inputarg = False
+        while True:
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= self.CODE_FROMSTACK:
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                # load the value from the stack
+                kind = code & 3
+                code = int((code - self.CODE_FROMSTACK) >> 2)
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
+                if kind == self.DESCR_FLOAT:
+                    # we use code + 1 to get the hi word of the double worded float
+                    stackloc = frame_pointer - get_fp_offset(int(code) + 1)
+                    assert stackloc & 3 == 0
+                    fvalue = rffi.cast(rffi.LONGLONGP, stackloc)[0]
+                else:
+                    stackloc = frame_pointer - get_fp_offset(int(code))
+                    assert stackloc & 1 == 0
+                    value = rffi.cast(rffi.LONGP, stackloc)[0]
+            else:
+                # 'code' identifies a register: load its value
+                kind = code & 3
+                if kind == self.DESCR_SPECIAL:
+                    if code == self.CODE_HOLE:
+                        num += 1
+                        continue
+                    if code == self.CODE_INPUTARG:
+                        code_inputarg = True
+                        continue
+                    assert code == self.CODE_STOP
+                    break
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
+                    fvalue = vfp_registers[code]
+                else:
+                    value = registers[code]
+            # store the loaded value into fail_boxes_<type>
+            if kind == self.DESCR_FLOAT:
+                tgt = self.fail_boxes_float.get_addr_for_num(num)
+                rffi.cast(rffi.LONGLONGP, tgt)[0] = fvalue
+            else:
+                if kind == self.DESCR_INT:
+                    tgt = self.fail_boxes_int.get_addr_for_num(num)
+                elif kind == self.DESCR_REF:
+                    assert (value & 3) == 0, "misaligned pointer"
+                    tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+                else:
+                    assert 0, "bogus kind"
+                rffi.cast(rffi.LONGP, tgt)[0] = value
+            num += 1
+        self.fail_boxes_count = num
+        fail_index = rffi.cast(rffi.INTP, bytecode)[0]
+        fail_index = rffi.cast(lltype.Signed, fail_index)
+        return fail_index
+
+    def decode_inputargs(self, code):
+        descr_to_box_type = [REF, INT, FLOAT]
+        bytecode = rffi.cast(rffi.UCHARP, code)
+        arglocs = []
+        code_inputarg = False
+        while 1:
+            # decode the next instruction from the bytecode
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= self.CODE_FROMSTACK:
+                # 'code' identifies a stack location
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                kind = code & 3
+                code = (code - self.CODE_FROMSTACK) >> 2
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
+                loc = ARMFrameManager.frame_pos(code, descr_to_box_type[kind])
+            elif code == self.CODE_STOP:
+                break
+            elif code == self.CODE_HOLE:
+                continue
+            elif code == self.CODE_INPUTARG:
+                code_inputarg = True
+                continue
+            else:
+                # 'code' identifies a register
+                kind = code & 3
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
+                    loc = r.all_vfp_regs[code]
+                else:
+                    loc = r.all_regs[code]
+            arglocs.append(loc)
+        return arglocs[:]
+
+    def _build_malloc_slowpath(self):
+        mc = ARMv7Builder()
+        if self.cpu.supports_floats:
+            vfp_regs = r.all_vfp_regs
+        else:
+            vfp_regs = []
+        # We need to push two registers here because we are going to make a
+        # call an therefore the stack needs to be 8-byte aligned
+        mc.PUSH([r.ip.value, r.lr.value])
+        with saved_registers(mc, [], vfp_regs):
+            # At this point we know that the values we need to compute the size
+            # are stored in r0 and r1.
+            mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value)
+            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+            for reg, ofs in ARMv7RegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.STR_ri(reg.value, r.fp.value, imm=ofs)
+            mc.BL(addr)
+            for reg, ofs in ARMv7RegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.LDR_ri(reg.value, r.fp.value, imm=ofs)
+
+        mc.CMP_ri(r.r0.value, 0)
+        mc.B(self.propagate_exception_path, c=c.EQ)
+        nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+        mc.gen_load_int(r.r1.value, nursery_free_adr)
+        mc.LDR_ri(r.r1.value, r.r1.value)
+        # see above
+        mc.POP([r.ip.value, r.pc.value])
+
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.malloc_slowpath = rawstart
+
+    def propagate_memoryerror_if_r0_is_null(self):
+        # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null
+        self.mc.CMP_ri(r.r0.value, 0)
+        self.mc.B(self.propagate_exception_path, c=c.EQ)
+
+    def _gen_exit_path(self):
+        mc = ARMv7Builder()
+        decode_registers_addr = llhelper(self.recovery_func_sign,
+                                            self.failure_recovery_func)
+        self._insert_checks(mc)
+        with saved_registers(mc, r.all_regs, r.all_vfp_regs):
+            # move mem block address, to r0 to pass as
+            mc.MOV_rr(r.r0.value, r.lr.value)
+            # pass the current frame pointer as second param
+            mc.MOV_rr(r.r1.value, r.fp.value)
+            # pass the current stack pointer as third param
+            mc.MOV_rr(r.r2.value, r.sp.value)
+            self._insert_checks(mc)
+            mc.BL(rffi.cast(lltype.Signed, decode_registers_addr))
+            mc.MOV_rr(r.ip.value, r.r0.value)
+        mc.MOV_rr(r.r0.value, r.ip.value)
+        self.gen_func_epilog(mc=mc)
+        return mc.materialize(self.cpu.asmmemmgr, [],
+                                   self.cpu.gc_ll_descr.gcrootmap)
+
+    DESCR_REF       = 0x00
+    DESCR_INT       = 0x01
+    DESCR_FLOAT     = 0x02
+    DESCR_SPECIAL   = 0x03
+    CODE_FROMSTACK  = 64
+    CODE_STOP       = 0 | DESCR_SPECIAL
+    CODE_HOLE       = 4 | DESCR_SPECIAL
+    CODE_INPUTARG   = 8 | DESCR_SPECIAL
+
+    def gen_descr_encoding(self, descr, failargs, locs):
+        assert self.mc is not None
+        for i in range(len(failargs)):
+            arg = failargs[i]
+            if arg is not None:
+                if arg.type == REF:
+                    kind = self.DESCR_REF
+                elif arg.type == INT:
+                    kind = self.DESCR_INT
+                elif arg.type == FLOAT:
+                    kind = self.DESCR_FLOAT
+                else:
+                    raise AssertionError("bogus kind")
+                loc = locs[i]
+                if loc.is_stack():
+                    pos = loc.position
+                    if pos < 0:
+                        self.mc.writechar(chr(self.CODE_INPUTARG))
+                        pos = ~pos
+                    n = self.CODE_FROMSTACK // 4 + pos
+                else:
+                    assert loc.is_reg() or loc.is_vfp_reg()
+                    n = loc.value
+                n = kind + 4 * n
+                while n > 0x7F:
+                    self.mc.writechar(chr((n & 0x7F) | 0x80))
+                    n >>= 7
+            else:
+                n = self.CODE_HOLE
+            self.mc.writechar(chr(n))
+        self.mc.writechar(chr(self.CODE_STOP))
+
+        fdescr = self.cpu.get_fail_descr_number(descr)
+        self.mc.write32(fdescr)
+        self.align()
+
+        # assert that the fail_boxes lists are big enough
+        assert len(failargs) <= self.fail_boxes_int.SIZE
+
+    def _gen_path_to_exit_path(self, descr, args, arglocs,
+                                            save_exc, fcond=c.AL):
+        assert isinstance(save_exc, bool)
+        self.gen_exit_code(self.mc, save_exc, fcond)
+        self.gen_descr_encoding(descr, args, arglocs[1:])
+
+    def gen_exit_code(self, mc, save_exc, fcond=c.AL):
+        assert isinstance(save_exc, bool)
+        if save_exc:
+            path = self._leave_jitted_hook_save_exc
+        else:
+            path = self._leave_jitted_hook
+        mc.BL(path)
+
+    def align(self):
+        while(self.mc.currpos() % FUNC_ALIGN != 0):
+            self.mc.writechar(chr(0))
+
+    def gen_func_epilog(self, mc=None, cond=c.AL):
+        stack_size = self.STACK_FIXED_AREA
+        stack_size -= len(r.callee_saved_registers) * WORD
+        if self.cpu.supports_floats:
+            stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if mc is None:
+            mc = self.mc
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self.gen_footer_shadowstack(gcrootmap, mc)
+        mc.MOV_rr(r.sp.value, r.fp.value, cond=cond)
+        mc.ADD_ri(r.sp.value, r.sp.value, stack_size, cond=cond)
+        if self.cpu.supports_floats:
+            mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
+                                                                    cond=cond)
+        mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond)
+
+    def gen_func_prolog(self):
+        stack_size = self.STACK_FIXED_AREA
+        stack_size -= len(r.callee_saved_registers) * WORD
+        if self.cpu.supports_floats:
+            stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+
+        self.mc.PUSH([reg.value for reg in r.callee_saved_registers])
+        if self.cpu.supports_floats:
+            self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
+        # here we modify the stack pointer to leave room for the 9 registers
+        # that are going to be saved here around malloc calls and one word to
+        # store the force index
+        self.mc.SUB_ri(r.sp.value, r.sp.value, stack_size)
+        self.mc.MOV_rr(r.fp.value, r.sp.value)
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self.gen_shadowstack_header(gcrootmap)
+
+    def gen_shadowstack_header(self, gcrootmap):
+        # we need to put two words into the shadowstack: the MARKER_FRAME
+        # and the address of the frame (fp, actually)
+        rst = gcrootmap.get_root_stack_top_addr()
+        self.mc.gen_load_int(r.ip.value, rst)
+        self.mc.LDR_ri(r.r4.value, r.ip.value)  # LDR r4, [rootstacktop]
+        #
+        MARKER = gcrootmap.MARKER_FRAME
+        self.mc.ADD_ri(r.r5.value, r.r4.value,
+                                    imm=2 * WORD)  # ADD r5, r4 [2*WORD]
+        self.mc.gen_load_int(r.r6.value, MARKER)
+        self.mc.STR_ri(r.r6.value, r.r4.value, WORD)  # STR MARKER, r4 [WORD]
+        self.mc.STR_ri(r.fp.value, r.r4.value)  # STR fp, r4
+        #
+        self.mc.STR_ri(r.r5.value, r.ip.value)  # STR r5 [rootstacktop]
+
+    def gen_footer_shadowstack(self, gcrootmap, mc):
+        rst = gcrootmap.get_root_stack_top_addr()
+        mc.gen_load_int(r.ip.value, rst)
+        mc.LDR_ri(r.r4.value, r.ip.value)  # LDR r4, [rootstacktop]
+        mc.SUB_ri(r.r5.value, r.r4.value, imm=2 * WORD)  # ADD r5, r4 [2*WORD]
+        mc.STR_ri(r.r5.value, r.ip.value)
+
+    def _dump(self, ops, type='loop'):
+        debug_start('jit-backend-ops')
+        debug_print(type)
+        for op in ops:
+            debug_print(op.repr())
+        debug_stop('jit-backend-ops')
+
+    def _call_header(self):
+        self.align()
+        self.gen_func_prolog()
+
+    def _call_header_with_stack_check(self):
+        if self.stack_check_slowpath == 0:
+            pass                # no stack check (e.g. not translated)
+        else:
+            endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+            self.mc.PUSH([r.lr.value])
+            # load stack end
+            self.mc.gen_load_int(r.ip.value, endaddr)          # load ip, [end]
+            self.mc.LDR_ri(r.ip.value, r.ip.value)             # LDR ip, ip
+            # load stack length
+            self.mc.gen_load_int(r.lr.value, lengthaddr)       # load lr, lengh
+            self.mc.LDR_ri(r.lr.value, r.lr.value)             # ldr lr, *lengh
+            # calculate ofs
+            self.mc.SUB_rr(r.ip.value, r.ip.value, r.sp.value) # SUB ip, current
+            # if ofs 
+            self.mc.CMP_rr(r.ip.value, r.lr.value)             # CMP ip, lr
+            self.mc.BL(self.stack_check_slowpath, c=c.HI)      # call if ip > lr
+            #
+            self.mc.POP([r.lr.value])
+        self._call_header()
+
+    # cpu interface
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
+        clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
+        looptoken.compiled_loop_token = clt
+        clt._debug_nbargs = len(inputargs)
+
+        if not we_are_translated():
+            # Arguments should be unique
+            assert len(set(inputargs)) == len(inputargs)
+
+        operations = self.setup(looptoken, operations)
+        if log:
+            operations = self._inject_debugging_code(looptoken, operations,
+                                                     'e', looptoken.number)
+
+        self._call_header_with_stack_check()
+        sp_patch_location = self._prepare_sp_patch_position()
+
+        regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
+        regalloc.prepare_loop(inputargs, operations)
+
+        loop_head = self.mc.get_relative_pos()
+        looptoken._arm_loop_code = loop_head
+        #
+        clt.frame_depth = -1
+        frame_depth = self._assemble(operations, regalloc)
+        clt.frame_depth = frame_depth
+        #
+        size_excluding_failure_stuff = self.mc.get_relative_pos()
+
+        self._patch_sp_offset(sp_patch_location, frame_depth)
+        self.write_pending_failure_recoveries()
+
+        rawstart = self.materialize_loop(looptoken)
+        looptoken._arm_func_addr = rawstart
+
+        self.process_pending_guards(rawstart)
+        self.fixup_target_tokens(rawstart)
+
+        if log and not we_are_translated():
+            self.mc._dump_trace(rawstart,
+                    'loop_%s.asm' % self.cpu.total_compiled_loops)
+
+        ops_offset = self.mc.ops_offset
+        self.teardown()
+
+        debug_start("jit-backend-addr")
+        debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
+            looptoken.number, loopname,
+            rawstart + loop_head,
+            rawstart + size_excluding_failure_stuff,
+            rawstart))
+        debug_stop("jit-backend-addr")
+
+        return AsmInfo(ops_offset, rawstart + loop_head,
+                       size_excluding_failure_stuff - loop_head)
+
+    def _assemble(self, operations, regalloc):
+        regalloc.compute_hint_frame_locations(operations)
+        self._walk_operations(operations, regalloc)
+        frame_depth = regalloc.frame_manager.get_frame_depth()
+        jump_target_descr = regalloc.jump_target_descr
+        if jump_target_descr is not None:
+            frame_depth = max(frame_depth,
+                                jump_target_descr._arm_clt.frame_depth)
+        return frame_depth
+
+    def assemble_bridge(self, faildescr, inputargs, operations,
+                                                    original_loop_token, log):
+        operations = self.setup(original_loop_token, operations)
+        descr_number = self.cpu.get_fail_descr_number(faildescr)
+        if log:
+            operations = self._inject_debugging_code(faildescr, operations,
+                                                     'b', descr_number)
+        assert isinstance(faildescr, AbstractFailDescr)
+        code = self._find_failure_recovery_bytecode(faildescr)
+        frame_depth = faildescr._arm_current_frame_depth
+        arglocs = self.decode_inputargs(code)
+        if not we_are_translated():
+            assert len(inputargs) == len(arglocs)
+
+        regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
+        regalloc.prepare_bridge(inputargs, arglocs, operations)
+
+        sp_patch_location = self._prepare_sp_patch_position()
+
+        startpos = self.mc.get_relative_pos()
+
+        frame_depth = self._assemble(operations, regalloc)
+
+        codeendpos = self.mc.get_relative_pos()
+
+        self._patch_sp_offset(sp_patch_location, frame_depth)
+
+        self.write_pending_failure_recoveries()
+
+        rawstart = self.materialize_loop(original_loop_token)
+
+        self.process_pending_guards(rawstart)
+        self.fixup_target_tokens(rawstart)
+
+        self.patch_trace(faildescr, original_loop_token,
+                                    rawstart, regalloc)
+
+        if not we_are_translated():
+            # for the benefit of tests
+            faildescr._arm_bridge_frame_depth = frame_depth
+            if log:
+                self.mc._dump_trace(rawstart, 'bridge_%d.asm' %
+                self.cpu.total_compiled_bridges)
+        self.current_clt.frame_depth = max(self.current_clt.frame_depth,
+                                                                frame_depth)
+        ops_offset = self.mc.ops_offset
+        self.teardown()
+
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
+        debug_stop("jit-backend-addr")
+
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
+    def _find_failure_recovery_bytecode(self, faildescr):
+        guard_stub_addr = faildescr._arm_failure_recovery_block
+        if guard_stub_addr == 0:
+            # This case should be prevented by the logic in compile.py:
+            # look for CNT_BUSY_FLAG, which disables tracing from a guard
+            # when another tracing from the same guard is already in progress.
+            raise BridgeAlreadyCompiled
+        # a guard requires 3 words to encode the jump to the exit code.
+        return guard_stub_addr + 3 * WORD
+
+    def fixup_target_tokens(self, rawstart):
+        for targettoken in self.target_tokens_currently_compiling:
+            targettoken._arm_loop_code += rawstart
+        self.target_tokens_currently_compiling = None
+
+    def target_arglocs(self, loop_token):
+        return loop_token._arm_arglocs
+
+    def materialize_loop(self, looptoken):
+        self.datablockwrapper.done()      # finish using cpu.asmmemmgr
+        self.datablockwrapper = None
+        allblocks = self.get_asmmemmgr_blocks(looptoken)
+        return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
+                                   self.cpu.gc_ll_descr.gcrootmap)
+
+    def write_pending_failure_recoveries(self):
+        for tok in self.pending_guards:
+            descr = tok.descr
+            #generate the exit stub and the encoded representation
+            pos = self.mc.currpos()
+            tok.pos_recovery_stub = pos
+
+            self._gen_path_to_exit_path(descr, tok.failargs,
+                                        tok.faillocs, save_exc=tok.save_exc)
+            # store info on the descr
+            descr._arm_current_frame_depth = tok.faillocs[0].getint()
+
+    def process_pending_guards(self, block_start):
+        clt = self.current_clt
+        for tok in self.pending_guards:
+            descr = tok.descr
+            assert isinstance(descr, AbstractFailDescr)
+            failure_recovery_pos = block_start + tok.pos_recovery_stub
+            descr._arm_failure_recovery_block = failure_recovery_pos
+            relative_offset = tok.pos_recovery_stub - tok.offset
+            guard_pos = block_start + tok.offset
+            if not tok.is_invalidate:
+                # patch the guard jumpt to the stub
+                # overwrite the generate NOP with a B_offs to the pos of the
+                # stub
+                mc = ARMv7Builder()
+                mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
+                mc.copy_to_raw_memory(guard_pos)
+            else:
+                clt.invalidate_positions.append((guard_pos, relative_offset))
+
+    def get_asmmemmgr_blocks(self, looptoken):
+        clt = looptoken.compiled_loop_token
+        if clt.asmmemmgr_blocks is None:
+            clt.asmmemmgr_blocks = []
+        return clt.asmmemmgr_blocks
+
+    def _prepare_sp_patch_position(self):
+        """Generate NOPs as placeholder to patch the instruction(s) to update
+        the sp according to the number of spilled variables"""
+        size = (self.mc.size_of_gen_load_int + WORD)
+        l = self.mc.currpos()
+        for _ in range(size // WORD):
+            self.mc.NOP()
+        return l
+
+    def _patch_sp_offset(self, pos, frame_depth):
+        cb = OverwritingBuilder(self.mc, pos,
+                                OverwritingBuilder.size_of_gen_load_int + WORD)
+        n = frame_depth * WORD
+
+        # ensure the sp is 8 byte aligned when patching it
+        if n % 8 != 0:
+            n += WORD
+        assert n % 8 == 0
+
+        self._adjust_sp(n, cb, base_reg=r.fp)
+
+    def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):
+        if cb is None:
+            cb = self.mc
+        if n < 0:
+            n = -n
+            rev = True
+        else:
+            rev = False
+        if n <= 0xFF and fcond == c.AL:
+            if rev:
+                cb.ADD_ri(r.sp.value, base_reg.value, n)
+            else:
+                cb.SUB_ri(r.sp.value, base_reg.value, n)
+        else:
+            cb.gen_load_int(r.ip.value, n, cond=fcond)
+            if rev:
+                cb.ADD_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
+            else:
+                cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
+
+    def _walk_operations(self, operations, regalloc):
+        fcond = c.AL
+        self._regalloc = regalloc
+        while regalloc.position() < len(operations) - 1:
+            regalloc.next_instruction()
+            i = regalloc.position()
+            op = operations[i]
+            self.mc.mark_op(op)
+            opnum = op.getopnum()
+            if op.has_no_side_effect() and op.result not in regalloc.longevity:
+                regalloc.possibly_free_vars_for_op(op)
+            elif self.can_merge_with_next_guard(op, i, operations):
+                guard = operations[i + 1]
+                assert guard.is_guard()
+                arglocs = regalloc_operations_with_guard[opnum](regalloc, op,
+                                        guard, fcond)
+                fcond = asm_operations_with_guard[opnum](self, op,
+                                        guard, arglocs, regalloc, fcond)
+                regalloc.next_instruction()
+                regalloc.possibly_free_vars_for_op(guard)
+                regalloc.possibly_free_vars(guard.getfailargs())
+            elif not we_are_translated() and op.getopnum() == -124:
+                regalloc.prepare_force_spill(op, fcond)
+            else:
+                arglocs = regalloc_operations[opnum](regalloc, op, fcond)
+                if arglocs is not None:
+                    fcond = asm_operations[opnum](self, op, arglocs,
+                                                        regalloc, fcond)
+            if op.is_guard():
+                regalloc.possibly_free_vars(op.getfailargs())
+            if op.result:
+                regalloc.possibly_free_var(op.result)
+            regalloc.possibly_free_vars_for_op(op)
+            regalloc.free_temp_vars()
+            regalloc._check_invariants()
+        self.mc.mark_op(None)  # end of the loop
+
+    # from ../x86/regalloc.py
+    def can_merge_with_next_guard(self, op, i, operations):
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
+            assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
+            return True
+        if not op.is_comparison():
+            if op.is_ovf():
+                if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
+                    operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
+                    not_implemented("int_xxx_ovf not followed by "
+                                    "guard_(no)_overflow")
+                return True
+            return False
+        if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
+            operations[i + 1].getopnum() != rop.GUARD_FALSE):
+            return False
+        if operations[i + 1].getarg(0) is not op.result:
+            return False
+        if (self._regalloc.longevity[op.result][1] > i + 1 or
+            op.result in operations[i + 1].getfailargs()):
+            return False
+        return True
+
+    def regalloc_emit_llong(self, op, arglocs, fcond, regalloc):
+        effectinfo = op.getdescr().get_extra_info()
+        oopspecindex = effectinfo.oopspecindex
+        asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+        return fcond 
+
+    def regalloc_emit_math(self, op, arglocs, fcond, regalloc):
+        effectinfo = op.getdescr().get_extra_info()
+        oopspecindex = effectinfo.oopspecindex
+        asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+        return fcond
+
+
+    def _insert_checks(self, mc=None):
+        if not we_are_translated() and self._debug:
+            if mc is None:
+                mc = self.mc
+            mc.CMP_rr(r.fp.value, r.sp.value)
+            mc.MOV_rr(r.pc.value, r.pc.value, cond=c.GE)
+            mc.BKPT()
+
+    def _ensure_result_bit_extension(self, resloc, size, signed):
+        if size == 4:
+            return
+        if size == 1:
+            if not signed:  # unsigned char
+                self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 24)
+                self.mc.ASR_ri(resloc.value, resloc.value, 24)
+        elif size == 2:
+            if not signed:
+                self.mc.LSL_ri(resloc.value, resloc.value, 16)
+                self.mc.LSR_ri(resloc.value, resloc.value, 16)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 16)
+                self.mc.ASR_ri(resloc.value, resloc.value, 16)
+
+    def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+        b = ARMv7Builder()
+        patch_addr = faildescr._arm_failure_recovery_block
+        assert patch_addr != 0
+        b.B(bridge_addr)
+        b.copy_to_raw_memory(patch_addr)
+        faildescr._arm_failure_recovery_block = 0
+
+    # regalloc support
+    def load(self, loc, value):
+        assert (loc.is_reg() and value.is_imm()
+                    or loc.is_vfp_reg() and value.is_imm_float())
+        if value.is_imm():
+            self.mc.gen_load_int(loc.value, value.getint())
+        elif value.is_imm_float():
+            self.mc.gen_load_int(r.ip.value, value.getint())
+            self.mc.VLDR(loc.value, r.ip.value)
+
+    def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
+        if not loc.is_reg() and not (loc.is_stack() and loc.type != FLOAT):
+            raise AssertionError("invalid target for move from imm value")
+        if loc.is_reg():
+            new_loc = loc
+        elif loc.is_stack():
+            self.mc.PUSH([r.lr.value], cond=cond)
+            new_loc = r.lr
+        else:
+            raise AssertionError("invalid target for move from imm value")
+        self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
+        if loc.is_stack():
+            self.regalloc_mov(new_loc, loc)
+            self.mc.POP([r.lr.value], cond=cond)
+
+    def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_imm():
+            raise AssertionError("mov reg to imm doesn't make sense")
+        if loc.is_reg():
+            self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
+        elif loc.is_stack() and loc.type != FLOAT:
+            # spill a core register
+            if prev_loc is r.ip:
+                temp = r.lr
+            else:
+                temp = r.ip
+            offset = loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([temp.value], cond=cond)
+                self.mc.gen_load_int(temp.value, -offset, cond=cond)
+                self.mc.STR_rr(prev_loc.value, r.fp.value,
+                                            temp.value, cond=cond)
+                self.mc.POP([temp.value], cond=cond)
+            else:
+                self.mc.STR_ri(prev_loc.value, r.fp.value,
+                                            imm=-offset, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
+        pushed = False
+        if loc.is_reg():
+            assert prev_loc.type != FLOAT, 'trying to load from an \
+                incompatible location into a core register'
+            assert loc is not r.lr, 'lr is not supported as a target \
+                when moving from the stack'
+            # unspill a core register
+            offset = prev_loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([r.lr.value], cond=cond)
+                pushed = True
+                self.mc.gen_load_int(r.lr.value, -offset, cond=cond)
+                self.mc.LDR_rr(loc.value, r.fp.value, r.lr.value, cond=cond)
+            else:
+                self.mc.LDR_ri(loc.value, r.fp.value, imm=-offset, cond=cond)
+            if pushed:
+                self.mc.POP([r.lr.value], cond=cond)
+        elif loc.is_vfp_reg():
+            assert prev_loc.type == FLOAT, 'trying to load from an \
+                incompatible location into a float register'
+            # load spilled value into vfp reg
+            offset = prev_loc.value
+            self.mc.PUSH([r.ip.value], cond=cond)
+            pushed = True
+            if not check_imm_arg(offset):
+                self.mc.gen_load_int(r.ip.value, offset, cond=cond)
+                self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
+            else:
+                self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
+            self.mc.VLDR(loc.value, r.ip.value, cond=cond)
+            if pushed:
+                self.mc.POP([r.ip.value], cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def _mov_imm_float_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_vfp_reg():
+            self.mc.PUSH([r.ip.value], cond=cond)
+            self.mc.gen_load_int(r.ip.value, prev_loc.getint(), cond=cond)
+            self.mc.VLDR(loc.value, r.ip.value, cond=cond)
+            self.mc.POP([r.ip.value], cond=cond)
+        elif loc.is_stack():
+            self.regalloc_push(r.vfp_ip)
+            self.regalloc_mov(prev_loc, r.vfp_ip, cond)
+            self.regalloc_mov(r.vfp_ip, loc, cond)
+            self.regalloc_pop(r.vfp_ip)
+        else:
+            assert 0, 'unsupported case'
+
+    def _mov_vfp_reg_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_vfp_reg():
+            self.mc.VMOV_cc(loc.value, prev_loc.value, cond=cond)
+        elif loc.is_stack():
+            assert loc.type == FLOAT, 'trying to store to an \
+                incompatible location from a float register'
+            # spill vfp register
+            self.mc.PUSH([r.ip.value], cond=cond)
+            offset = loc.value
+            if not check_imm_arg(offset):
+                self.mc.gen_load_int(r.ip.value, offset, cond=cond)
+                self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
+            else:
+                self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
+            self.mc.VSTR(prev_loc.value, r.ip.value, cond=cond)
+            self.mc.POP([r.ip.value], cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def regalloc_mov(self, prev_loc, loc, cond=c.AL):
+        """Moves a value from a previous location to some other location"""
+        if prev_loc.is_imm():
+            return self._mov_imm_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_reg():
+            self._mov_reg_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_stack():
+            self._mov_stack_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_imm_float():
+            self._mov_imm_float_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_vfp_reg():
+            self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
+        else:
+            assert 0, 'unsupported case'
+    mov_loc_loc = regalloc_mov
+
+    def mov_from_vfp_loc(self, vfp_loc, reg1, reg2, cond=c.AL):
+        """Moves floating point values either as an immediate, in a vfp
+        register or at a stack location to a pair of core registers"""
+        assert reg1.value + 1 == reg2.value
+        if vfp_loc.is_vfp_reg():
+            self.mc.VMOV_rc(reg1.value, reg2.value, vfp_loc.value, cond=cond)
+        elif vfp_loc.is_imm_float():
+            self.mc.PUSH([r.ip.value], cond=cond)
+            self.mc.gen_load_int(r.ip.value, vfp_loc.getint(), cond=cond)
+            # we need to load one word to loc and one to loc+1 which are
+            # two 32-bit core registers
+            self.mc.LDR_ri(reg1.value, r.ip.value, cond=cond)
+            self.mc.LDR_ri(reg2.value, r.ip.value, imm=WORD, cond=cond)
+            self.mc.POP([r.ip.value], cond=cond)
+        elif vfp_loc.is_stack() and vfp_loc.type == FLOAT:
+            # load spilled vfp value into two core registers
+            offset = vfp_loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([r.ip.value], cond=cond)
+                self.mc.gen_load_int(r.ip.value, -offset, cond=cond)
+                self.mc.LDR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
+                self.mc.LDR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.POP([r.ip.value], cond=cond)
+            else:
+                self.mc.LDR_ri(reg1.value, r.fp.value, imm=-offset, cond=cond)
+                self.mc.LDR_ri(reg2.value, r.fp.value,
+                                                imm=-offset + WORD, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def mov_to_vfp_loc(self, reg1, reg2, vfp_loc, cond=c.AL):
+        """Moves a floating point value from to consecutive core registers to a
+        vfp location, either a vfp regsiter or a stacklocation"""
+        assert reg1.value + 1 == reg2.value
+        if vfp_loc.is_vfp_reg():
+            self.mc.VMOV_cr(vfp_loc.value, reg1.value, reg2.value, cond=cond)
+        elif vfp_loc.is_stack():
+            # move from two core registers to a float stack location
+            offset = vfp_loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([r.ip.value], cond=cond)
+                self.mc.gen_load_int(r.ip.value, -offset, cond=cond)
+                self.mc.STR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
+                self.mc.STR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.POP([r.ip.value], cond=cond)
+            else:
+                self.mc.STR_ri(reg1.value, r.fp.value, imm=-offset, cond=cond)
+                self.mc.STR_ri(reg2.value, r.fp.value,
+                                                imm=-offset + WORD, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def regalloc_push(self, loc, cond=c.AL):
+        """Pushes the value stored in loc to the stack
+        Can trash the current value of the IP register when pushing a stack
+        loc"""
+
+        if loc.is_stack():
+            if loc.type != FLOAT:
+                scratch_reg = r.ip
+            else:
+                scratch_reg = r.vfp_ip
+            self.regalloc_mov(loc, scratch_reg, cond)
+            self.regalloc_push(scratch_reg, cond)
+        elif loc.is_reg():
+            self.mc.PUSH([loc.value], cond=cond)
+        elif loc.is_vfp_reg():
+            self.mc.VPUSH([loc.value], cond=cond)
+        elif loc.is_imm():
+            self.regalloc_mov(loc, r.ip)
+            self.mc.PUSH([r.ip.value], cond=cond)
+        elif loc.is_imm_float():
+            self.regalloc_mov(loc, r.vfp_ip)
+            self.mc.VPUSH([r.vfp_ip.value], cond=cond)
+        else:
+            raise AssertionError('Trying to push an invalid location')
+
+    def regalloc_pop(self, loc, cond=c.AL):
+        """Pops the value on top of the stack to loc Can trash the current
+        value of the IP register when popping to a stack loc"""
+        if loc.is_stack():
+            if loc.type != FLOAT:
+                scratch_reg = r.ip
+            else:
+                scratch_reg = r.vfp_ip
+            self.regalloc_pop(scratch_reg)
+            self.regalloc_mov(scratch_reg, loc)
+        elif loc.is_reg():
+            self.mc.POP([loc.value], cond=cond)
+        elif loc.is_vfp_reg():
+            self.mc.VPOP([loc.value], cond=cond)
+        else:
+            raise AssertionError('Trying to pop to an invalid location')
+
+    def leave_jitted_hook(self):
+        ptrs = self.fail_boxes_ptr.ar
+        llop.gc_assume_young_pointers(lltype.Void,
+                                      llmemory.cast_ptr_to_adr(ptrs))
+
+    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
+        assert size & (WORD-1) == 0     # must be correctly aligned
+
+        self.mc.gen_load_int(r.r0.value, nursery_free_adr)
+        self.mc.LDR_ri(r.r0.value, r.r0.value)
+
+        if check_imm_arg(size):
+            self.mc.ADD_ri(r.r1.value, r.r0.value, size)
+        else:
+            self.mc.gen_load_int(r.r1.value, size)
+            self.mc.ADD_rr(r.r1.value, r.r0.value, r.r1.value)
+
+        self.mc.gen_load_int(r.ip.value, nursery_top_adr)
+        self.mc.LDR_ri(r.ip.value, r.ip.value)
+
+        self.mc.CMP_rr(r.r1.value, r.ip.value)
+
+        # We load into r0 the address stored at nursery_free_adr We calculate
+        # the new value for nursery_free_adr and store in r1 The we load the
+        # address stored in nursery_top_adr into IP If the value in r1 is
+        # (unsigned) bigger than the one in ip we conditionally call
+        # malloc_slowpath in case we called malloc_slowpath, which returns the
+        # new value of nursery_free_adr in r1 and the adr of the new object in
+        # r0.
+        self.mark_gc_roots(self.write_new_force_index(),
+                           use_copy_area=True)
+        self.mc.BL(self.malloc_slowpath, c=c.HI)
+
+        self.mc.gen_load_int(r.ip.value, nursery_free_adr)
+        self.mc.STR_ri(r.r1.value, r.ip.value)
+
+    def mark_gc_roots(self, force_index, use_copy_area=False):
+        if force_index < 0:
+            return     # not needed
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
+            assert gcrootmap.is_shadow_stack
+            gcrootmap.write_callshape(mark, force_index)
+
+    def write_new_force_index(self):
+        # for shadowstack only: get a new, unused force_index number and
+        # write it to FORCE_INDEX_OFS.  Used to record the call shape
+        # (i.e. where the GC pointers are in the stack) around a CALL
+        # instruction that doesn't already have a force_index.
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            clt = self.current_clt
+            force_index = clt.reserve_and_record_some_faildescr_index()
+            self._write_fail_index(force_index)
+            return force_index
+        else:
+            return 0
+
+
+def not_implemented(msg):
+    os.write(2, '[ARM/asm] %s\n' % msg)
+    raise NotImplementedError(msg)
+
+
+def notimplemented_op(self, op, arglocs, regalloc, fcond):
+    print "[ARM/asm] %s not implemented" % op.getopname()
+    raise NotImplementedError(op)
+
+
+def notimplemented_op_with_guard(self, op, guard_op, arglocs, regalloc, fcond):
+    print "[ARM/asm] %s with guard %s not implemented" % \
+                        (op.getopname(), guard_op.getopname())
+    raise NotImplementedError(op)
+
+asm_operations = [notimplemented_op] * (rop._LAST + 1)
+asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
+asm_llong_operations = {}
+asm_math_operations = {}
+
+for name, value in ResOpAssembler.__dict__.iteritems():
+    if name.startswith('emit_guard_'):
+        opname = name[len('emit_guard_'):]
+        num = getattr(rop, opname.upper())
+        asm_operations_with_guard[num] = value
+    elif name.startswith('emit_op_llong_'):
+        opname = name[len('emit_op_llong_'):]
+        num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
+        asm_llong_operations[num] = value
+    elif name.startswith('emit_op_math_'):
+        opname = name[len('emit_op_math_'):]
+        num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
+        asm_math_operations[num] = value
+    elif name.startswith('emit_op_'):
+        opname = name[len('emit_op_'):]
+        num = getattr(rop, opname.upper())
+        asm_operations[num] = value
+
+
+class BridgeAlreadyCompiled(Exception):
+    pass
diff --git a/pypy/jit/backend/arm/codebuilder.py b/pypy/jit/backend/arm/codebuilder.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/codebuilder.py
@@ -0,0 +1,311 @@
+from pypy.jit.backend.arm import arch
+from pypy.jit.backend.arm import conditions as cond
+from pypy.jit.backend.arm import registers as reg
+from pypy.jit.backend.arm.arch import (WORD, FUNC_ALIGN)
+from pypy.jit.backend.arm.instruction_builder import define_instructions
+from pypy.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.tool.udir import udir
+
+clear_cache = rffi.llexternal(
+    "__clear_cache",
+    [llmemory.Address, llmemory.Address],
+    lltype.Void,
+    _nowrapper=True,
+    sandboxsafe=True)
+
+
+def binary_helper_call(name):
+    function = getattr(arch, 'arm_%s' % name)
+
+    def f(self, c=cond.AL):
+        """Generates a call to a helper function, takes its
+        arguments in r0 and r1, result is placed in r0"""
+        addr = rffi.cast(lltype.Signed, function)
+        self.BL(addr, c)
+    return f
+
+
+class AbstractARMv7Builder(object):
+
+    def __init__(self):
+        pass
+
+    def align(self):
+        while(self.currpos() % FUNC_ALIGN != 0):
+            self.writechar(chr(0))
+
+    def NOP(self):
+        self.MOV_rr(0, 0)
+
+    def PUSH(self, regs, cond=cond.AL):
+        assert reg.sp.value not in regs
+        instr = self._encode_reg_list(cond << 28 | 0x92D << 16, regs)
+        self.write32(instr)
+
+    def VPUSH(self, regs, cond=cond.AL):
+        nregs = len(regs)
+        assert nregs > 0 and nregs <= 16
+        freg = regs[0]
+        D = (freg & 0x10) >> 4
+        Dd = (freg & 0xF)
+        nregs *= 2
+        instr = (cond << 28
+                | 0xD2D << 16
+                | D << 22
+                | Dd << 12
+                | 0xB << 8
+                | nregs)
+        self.write32(instr)
+
+    def VPOP(self, regs, cond=cond.AL):
+        nregs = len(regs)
+        assert nregs > 0 and nregs <= 16
+        freg = regs[0]
+        D = (freg & 0x10) >> 4
+        Dd = (freg & 0xF)
+        nregs *= 2
+        instr = (cond << 28
+                | 0xCBD << 16
+                | D << 22
+                | Dd << 12
+                | 0xB << 8
+                | nregs)
+        self.write32(instr)
+
+    def VMOV_rc(self, rt, rt2, dm, cond=cond.AL):
+        """This instruction copies two words from two ARM core registers into a
+        doubleword extension register, or from a doubleword extension register
+        to two ARM core registers.
+        """
+        op = 1
+        instr = (cond << 28
+                | 0xC << 24
+                | 0x4 << 20
+                | op << 20
+                | (rt2 & 0xF) << 16
+                | (rt & 0xF) << 12
+                | 0xB << 8
+                | 0x1 << 4
+                | (dm & 0xF))
+        self.write32(instr)
+
+    # VMOV<c> <Dm>, <Rt>, <Rt2>
+    def VMOV_cr(self, dm, rt, rt2, cond=cond.AL):
+        """This instruction copies two words from two ARM core registers into a
+        doubleword extension register, or from a doubleword extension register
+        to two ARM core registers.
+        """
+        op = 0
+        instr = (cond << 28
+                | 0xC << 24
+                | 0x4 << 20
+                | op << 20
+                | (rt2 & 0xF) << 16
+                | (rt & 0xF) << 12
+                | 0xB << 8
+                | 0x1 << 4
+                | (dm & 0xF))
+        self.write32(instr)
+
+    def VMOV_cc(self, dd, dm, cond=cond.AL):
+        sz = 1  # for 64-bit mode
+        instr = (cond << 28
+                | 0xEB << 20
+                | (dd & 0xF) << 12
+                | 0x5 << 9
+                | (sz & 0x1) << 8
+                | 0x1 << 6
+                | (dm & 0xF))
+        self.write32(instr)
+
+    def VCVT_float_to_int(self, target, source, cond=cond.AL):
+        opc2 = 0x5
+        sz = 1
+        self._VCVT(target, source, cond, opc2, sz)
+
+    def VCVT_int_to_float(self, target, source, cond=cond.AL):
+        self._VCVT(target, source, cond, 0, 1)
+
+    def _VCVT(self, target, source, cond, opc2, sz):
+        D = 0x0
+        M = 0
+        op = 1
+        instr = (cond << 28
+                | 0xEB8 << 16
+                | D << 22
+                | opc2 << 16
+                | (target & 0xF) << 12
+                | 0x5 << 9
+                | sz << 8
+                | op << 7
+                | 1 << 6
+                | M << 5
+                | (source & 0xF))
+        self.write32(instr)
+
+    def POP(self, regs, cond=cond.AL):
+        instr = self._encode_reg_list(cond << 28 | 0x8BD << 16, regs)
+        self.write32(instr)
+
+    def BKPT(self):
+        """Unconditional breakpoint"""
+        self.write32(cond.AL << 28 | 0x1200070)
+
+    # corresponds to the instruction vmrs APSR_nzcv, fpscr
+    def VMRS(self, cond=cond.AL):
+        self.write32(cond << 28 | 0xEF1FA10)
+
+    def B(self, target, c=cond.AL):
+        self.gen_load_int(reg.ip.value, target, cond=c)
+        self.BX(reg.ip.value, c=c)
+
+    def BX(self, reg, c=cond.AL):
+        self.write32(c << 28 | 0x12FFF1 << 4 | (reg & 0xF))
+
+    def B_offs(self, target_ofs, c=cond.AL):
+        pos = self.currpos()
+        target_ofs = target_ofs - (pos + arch.PC_OFFSET)
+        assert target_ofs & 0x3 == 0
+        self.write32(c << 28 | 0xA << 24 | (target_ofs >> 2) & 0xFFFFFF)
+
+    def BL(self, addr, c=cond.AL):
+        target = rffi.cast(rffi.INT, addr)
+        self.gen_load_int(reg.ip.value, target, cond=c)
+        self.BLX(reg.ip.value, c)
+
+    def BLX(self, reg, c=cond.AL):
+        self.write32(c << 28 | 0x12FFF3 << 4 | (reg & 0xF))
+
+    def MOVT_ri(self, rd, imm16, c=cond.AL):
+        """Move Top writes an immediate value to the top halfword of the
+        destination register. It does not affect the contents of the bottom
+        halfword."""
+        self.write32(c << 28
+                    | 0x3 << 24
+                    | (1 << 22)
+                    | ((imm16 >> 12) & 0xF) << 16
+                    | (rd & 0xF) << 12
+                    | imm16 & 0xFFF)
+
+    def MOVW_ri(self, rd, imm16, c=cond.AL):
+        """Encoding A2 of MOV, that allow to load a 16 bit constant"""
+        self.write32(c << 28
+                    | 0x3 << 24
+                    | ((imm16 >> 12) & 0xF) << 16
+                    | (rd & 0xF) << 12
+                    | imm16 & 0xFFF)
+
+    DIV = binary_helper_call('int_div')
+    MOD = binary_helper_call('int_mod')
+    UDIV = binary_helper_call('uint_div')
+
+    def _encode_reg_list(self, instr, regs):
+        for reg in regs:
+            instr |= 0x1 << reg
+        return instr
+
+    def _encode_imm(self, imm):
+        u = 1
+        if imm < 0:
+            u = 0
+            imm = -imm
+        return u, imm
+
+    def write32(self, word):
+        self.writechar(chr(word & 0xFF))
+        self.writechar(chr((word >> 8) & 0xFF))
+        self.writechar(chr((word >> 16) & 0xFF))
+        self.writechar(chr((word >> 24) & 0xFF))
+
+    def writechar(self, char):
+        raise NotImplementedError
+
+    def currpos(self):
+        raise NotImplementedError
+
+    def gen_load_int(self, r, value, cond=cond.AL):
+        """r is the register number, value is the value to be loaded to the
+        register"""
+        bottom = value & 0xFFFF
+        top = value >> 16
+        self.MOVW_ri(r, bottom, cond)
+        if top:
+            self.MOVT_ri(r, top, cond)
+    size_of_gen_load_int = 2 * WORD
+
+
+class OverwritingBuilder(AbstractARMv7Builder):
+    def __init__(self, cb, start, size):
+        AbstractARMv7Builder.__init__(self)
+        self.cb = cb
+        self.index = start
+        self.end = start + size
+
+    def currpos(self):
+        return self.index
+
+    def writechar(self, char):
+        assert self.index <= self.end
+        self.cb.overwrite(self.index, char)
+        self.index += 1
+
+
+class ARMv7Builder(BlockBuilderMixin, AbstractARMv7Builder):
+    def __init__(self):
+        AbstractARMv7Builder.__init__(self)
+        self.init_block_builder()
+        #
+        # ResOperation --> offset in the assembly.
+        # ops_offset[None] represents the beginning of the code after the last op
+        # (i.e., the tail of the loop)
+        self.ops_offset = {}
+
+    def mark_op(self, op):
+        pos = self.get_relative_pos()
+        self.ops_offset[op] = pos
+
+    def _dump_trace(self, addr, name, formatter=-1):
+        if not we_are_translated():
+            if formatter != -1:
+                name = name % formatter
+            dir = udir.ensure('asm', dir=True)
+            f = dir.join(name).open('wb')
+            data = rffi.cast(rffi.CCHARP, addr)
+            for i in range(self.currpos()):
+                f.write(data[i])
+            f.close()
+
+    # XXX remove and setup aligning in llsupport
+    def materialize(self, asmmemmgr, allblocks, gcrootmap=None):
+        size = self.get_relative_pos()
+        malloced = asmmemmgr.malloc(size, size + 7)
+        allblocks.append(malloced)
+        rawstart = malloced[0]
+        while(rawstart % FUNC_ALIGN != 0):
+            rawstart += 1
+        self.copy_to_raw_memory(rawstart)
+        if self.gcroot_markers is not None:
+            assert gcrootmap is not None
+            for pos, mark in self.gcroot_markers:
+                gcrootmap.put(rawstart + pos, mark)
+        return rawstart
+
+    def clear_cache(self, addr):
+        if we_are_translated():
+            startaddr = rffi.cast(llmemory.Address, addr)
+            endaddr = rffi.cast(llmemory.Address,
+                            addr + self.get_relative_pos())
+            clear_cache(startaddr, endaddr)
+
+    def copy_to_raw_memory(self, addr):
+        self._copy_to_raw_memory(addr)
+        self.clear_cache(addr)
+        self._dump(addr, "jit-backend-dump", 'arm')
+
+    def currpos(self):
+        return self.get_relative_pos()
+
+
+define_instructions(AbstractARMv7Builder)
diff --git a/pypy/jit/backend/arm/conditions.py b/pypy/jit/backend/arm/conditions.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/conditions.py
@@ -0,0 +1,26 @@
+EQ = 0x0
+NE = 0x1
+HS = CS = 0x2
+LO = CC = 0x3
+MI = 0x4
+PL = 0x5
+VS = 0x6
+VC = 0x7
+HI = 0x8
+LS = 0x9
+GE = 0xA
+LT = 0xB
+GT = 0xC
+LE = 0xD
+AL = 0xE
+
+opposites = [NE, EQ, CC, CS, PL, MI, VC, VS, LS, HI, LT, GE, LE, GT, AL]
+
+
+def get_opposite_of(operation):
+    return opposites[operation]
+
+# see mapping for floating poin according to
+# http://blogs.arm.com/software-enablement/405-condition-codes-4-floating-point-comparisons-using-vfp/
+VFP_LT = CC
+VFP_LE = LS
diff --git a/pypy/jit/backend/arm/detect.py b/pypy/jit/backend/arm/detect.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/detect.py
@@ -0,0 +1,31 @@
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.tool import rffi_platform
+from pypy.translator.platform import CompilationError
+
+eci = ExternalCompilationInfo(
+    post_include_bits=["""
+// we need to disable optimizations so the compiler does not remove this
+// function when checking if the file compiles
+static void __attribute__((optimize("O0"))) pypy__arm_has_vfp()
+{
+    asm volatile("VMOV s0, s1");
+}
+    """])
+
+def detect_hardfloat():
+    # http://gcc.gnu.org/ml/gcc-patches/2010-10/msg02419.html
+    if rffi_platform.getdefined('__ARM_PCS_VFP', ''):
+       return rffi_platform.getconstantinteger('__ARM_PCS_VFP', '')
+    return False
+
+def detect_float():
+    """Check for hardware float support
+    we try to compile a function containing a VFP instruction, and if the
+    compiler accepts it we assume we are fine
+    """
+    try:
+        rffi_platform.verify_eci(eci)
+        return True
+    except CompilationError:
+        return False
diff --git a/pypy/jit/backend/arm/helper/__init__.py b/pypy/jit/backend/arm/helper/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/arm/helper/assembler.py b/pypy/jit/backend/arm/helper/assembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/helper/assembler.py
@@ -0,0 +1,182 @@
+from __future__ import with_statement
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm.codebuilder import AbstractARMv7Builder
+from pypy.jit.metainterp.history import ConstInt, BoxInt, FLOAT
+from pypy.rlib.rarithmetic import r_uint, r_longlong, intmask
+from pypy.jit.metainterp.resoperation import rop
+
+def gen_emit_op_unary_cmp(name, true_cond):
+    false_cond = c.get_opposite_of(true_cond)
+    def f(self, op, arglocs, regalloc, fcond):
+        assert fcond is not None
+        reg, res = arglocs
+        self.mc.CMP_ri(reg.value, 0)
+        self.mc.MOV_ri(res.value, 1, true_cond)
+        self.mc.MOV_ri(res.value, 0, false_cond)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+
+def gen_emit_guard_unary_cmp(name, true_cond):
+    false_cond = c.get_opposite_of(true_cond)
+    def f(self, op, guard, arglocs, regalloc, fcond):
+        assert fcond is not None
+        assert guard is not None
+        reg = arglocs[0]
+        self.mc.CMP_ri(reg.value, 0)
+        cond = true_cond
+        guard_opnum = guard.getopnum()
+        if guard_opnum == rop.GUARD_FALSE:
+            cond = false_cond
+        return self._emit_guard(guard, arglocs[1:], cond, save_exc=False)
+    f.__name__ = 'emit_guard_%s' % name
+    return f
+
+def gen_emit_op_ri(name, opname):
+    ri_op = getattr(AbstractARMv7Builder, '%s_ri' % opname)
+    rr_op = getattr(AbstractARMv7Builder, '%s_rr' % opname)
+    def f(self, op, arglocs, regalloc, fcond):
+        assert fcond is not None
+        l0, l1, res = arglocs
+        if l1.is_imm():
+            ri_op(self.mc, res.value, l0.value, imm=l1.value, cond=fcond)
+        else:
+            rr_op(self.mc, res.value, l0.value, l1.value)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+
+def gen_emit_op_by_helper_call(name, opname):
+    helper = getattr(AbstractARMv7Builder, opname)
+    def f(self, op, arglocs, regalloc, fcond):
+        assert fcond is not None
+        if op.result:
+            regs = r.caller_resp[1:] + [r.ip]
+        else:
+            regs = r.caller_resp
+        with saved_registers(self.mc, regs, r.caller_vfp_resp):
+            helper(self.mc, fcond)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+
+def gen_emit_cmp_op(name, condition):
+    inv = c.get_opposite_of(condition)
+    def f(self, op, arglocs, regalloc, fcond):
+        l0, l1, res = arglocs
+
+        if l1.is_imm():
+            self.mc.CMP_ri(l0.value, imm=l1.getint(), cond=fcond)
+        else:
+            self.mc.CMP_rr(l0.value, l1.value, cond=fcond)
+        self.mc.MOV_ri(res.value, 1, cond=condition)
+        self.mc.MOV_ri(res.value, 0, cond=inv)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+
+def gen_emit_cmp_op_guard(name, true_cond):
+    false_cond = c.get_opposite_of(true_cond)
+    def f(self, op, guard, arglocs, regalloc, fcond):
+        assert guard is not None
+        l0 = arglocs[0]
+        l1 = arglocs[1]
+        assert l0.is_reg()
+
+        if l1.is_imm():
+            self.mc.CMP_ri(l0.value, imm=l1.getint(), cond=fcond)
+        else:
+            self.mc.CMP_rr(l0.value, l1.value, cond=fcond)
+        guard_opnum = guard.getopnum()
+        cond = true_cond
+        if guard_opnum == rop.GUARD_FALSE:
+            cond = false_cond
+        return self._emit_guard(guard, arglocs[2:], cond, save_exc=False)
+    f.__name__ = 'emit_guard_%s' % name
+    return f
+
+def gen_emit_float_op(name, opname):
+    op_rr = getattr(AbstractARMv7Builder, opname)
+    def f(self, op, arglocs, regalloc, fcond):
+        arg1, arg2, result = arglocs
+        op_rr(self.mc, result.value, arg1.value, arg2.value)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+def gen_emit_unary_float_op(name, opname):
+    op_rr = getattr(AbstractARMv7Builder, opname)
+    def f(self, op, arglocs, regalloc, fcond):
+        arg1, result = arglocs
+        op_rr(self.mc, result.value, arg1.value)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+
+def gen_emit_float_cmp_op(name, cond):
+    inv = c.get_opposite_of(cond)
+    def f(self, op, arglocs, regalloc, fcond):
+        arg1, arg2, res = arglocs
+        self.mc.VCMP(arg1.value, arg2.value)
+        self.mc.VMRS(cond=fcond)
+        self.mc.MOV_ri(res.value, 1, cond=cond)
+        self.mc.MOV_ri(res.value, 0, cond=inv)
+        return fcond
+    f.__name__ = 'emit_op_%s' % name
+    return f
+
+def gen_emit_float_cmp_op_guard(name, true_cond):
+    false_cond = c.get_opposite_of(true_cond)
+    def f(self, op, guard, arglocs, regalloc, fcond):
+        assert guard is not None
+        arg1 = arglocs[0]
+        arg2 = arglocs[1]
+        self.mc.VCMP(arg1.value, arg2.value)
+        self.mc.VMRS(cond=fcond)
+        cond = true_cond
+        guard_opnum = guard.getopnum()
+        if guard_opnum == rop.GUARD_FALSE:
+            cond = false_cond
+        return self._emit_guard(guard, arglocs[2:], cond, save_exc=False)
+    f.__name__ = 'emit_guard_%s' % name
+    return f
+
+
+class saved_registers(object):
+    def __init__(self, cb, regs_to_save, vfp_regs_to_save=None):
+        self.cb = cb
+        if vfp_regs_to_save is None:
+            vfp_regs_to_save = []
+        self.regs = regs_to_save
+        self.vfp_regs = vfp_regs_to_save
+
+    def __enter__(self):
+        if len(self.regs) > 0:
+            self.cb.PUSH([r.value for r in self.regs])
+        if len(self.vfp_regs) > 0:
+            self.cb.VPUSH([r.value for r in self.vfp_regs])
+
+    def __exit__(self, *args):
+        if len(self.vfp_regs) > 0:
+            self.cb.VPOP([r.value for r in self.vfp_regs])
+        if len(self.regs) > 0:
+            self.cb.POP([r.value for r in self.regs])
+
+def count_reg_args(args):
+    reg_args = 0
+    words = 0
+    count = 0
+    for x in range(min(len(args), 4)):
+        if args[x].type == FLOAT:
+            words += 2
+            if count % 2 != 0:
+                words += 1
+                count = 0
+        else:
+            count += 1
+            words += 1
+        reg_args += 1
+        if words > 4:
+            reg_args = x
+            break
+    return reg_args
diff --git a/pypy/jit/backend/arm/helper/regalloc.py b/pypy/jit/backend/arm/helper/regalloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/helper/regalloc.py
@@ -0,0 +1,146 @@
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm.codebuilder import AbstractARMv7Builder
+from pypy.jit.metainterp.history import ConstInt, BoxInt, Box, FLOAT
+from pypy.jit.metainterp.history import ConstInt
+from pypy.rlib.objectmodel import we_are_translated
+
+def check_imm_arg(arg, size=0xFF, allow_zero=True):
+    assert not isinstance(arg, ConstInt)
+    if not we_are_translated():
+        if not isinstance(arg, int):
+            import pdb; pdb.set_trace()
+    i = arg
+    if allow_zero:
+        lower_bound = i >= 0
+    else:
+        lower_bound = i > 0
+    return i <= size and lower_bound
+
+def check_imm_box(arg, size=0xFF, allow_zero=True):
+    if isinstance(arg, ConstInt):
+        return check_imm_arg(arg.getint(), size, allow_zero)
+    return False
+
+
+def prepare_op_ri(name=None, imm_size=0xFF, commutative=True, allow_zero=True):
+    def f(self, op, fcond):
+        assert fcond is not None
+        a0 = op.getarg(0)
+        a1 = op.getarg(1)
+        boxes = list(op.getarglist())
+        imm_a0 = check_imm_box(a0, imm_size, allow_zero=allow_zero)
+        imm_a1 = check_imm_box(a1, imm_size, allow_zero=allow_zero)
+        if not imm_a0 and imm_a1:
+            l0 = self._ensure_value_is_boxed(a0)
+            l1 = self.convert_to_imm(a1)
+        elif commutative and imm_a0 and not imm_a1:
+            l1 = self.convert_to_imm(a0)
+            l0 = self._ensure_value_is_boxed(a1, boxes)
+        else:
+            l0 = self._ensure_value_is_boxed(a0, boxes)
+            l1 = self._ensure_value_is_boxed(a1, boxes)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result, boxes)
+        return [l0, l1, res]
+    if name:
+        f.__name__ = name
+    return f
+
+def prepare_float_op(name=None, base=True, float_result=True, guard=False):
+    if guard:
+        def f(self, op, guard_op, fcond):
+            locs = []
+            loc1 = self._ensure_value_is_boxed(op.getarg(0))
+            locs.append(loc1)
+            if base:
+                loc2 = self._ensure_value_is_boxed(op.getarg(1))
+                locs.append(loc2)
+            self.possibly_free_vars_for_op(op)
+            self.free_temp_vars()
+            if guard_op is None:
+                res = self.force_allocate_reg(op.result)
+                assert float_result == (op.result.type == FLOAT)
+                locs.append(res)
+                return locs
+            else:
+                args = self._prepare_guard(guard_op, locs)
+                return args
+    else:
+        def f(self, op, fcond):
+            locs = []
+            loc1 = self._ensure_value_is_boxed(op.getarg(0))
+            locs.append(loc1)
+            if base:
+                loc2 = self._ensure_value_is_boxed(op.getarg(1))
+                locs.append(loc2)
+            self.possibly_free_vars_for_op(op)
+            self.free_temp_vars()
+            res = self.force_allocate_reg(op.result)
+            assert float_result == (op.result.type == FLOAT)
+            locs.append(res)
+            return locs
+    if name:
+        f.__name__ = name
+    return f
+
+def prepare_op_by_helper_call(name):
+    def f(self, op, fcond):
+        assert fcond is not None
+        a0 = op.getarg(0)
+        a1 = op.getarg(1)
+        arg1 = self.rm.make_sure_var_in_reg(a0, selected_reg=r.r0)
+        arg2 = self.rm.make_sure_var_in_reg(a1, selected_reg=r.r1)
+        assert arg1 == r.r0
+        assert arg2 == r.r1
+        if isinstance(a0, Box) and self.stays_alive(a0):
+            self.force_spill_var(a0)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        self.after_call(op.result)
+        self.possibly_free_var(op.result)
+        return []
+    f.__name__ = name
+    return f
+
+def prepare_cmp_op(name=None):
+    def f(self, op, guard_op, fcond):
+        assert fcond is not None
+        boxes = list(op.getarglist())
+        arg0, arg1 = boxes
+        imm_a1 = check_imm_box(arg1)
+
+        l0 = self._ensure_value_is_boxed(arg0, forbidden_vars=boxes)
+        if imm_a1:
+            l1 = self.convert_to_imm(arg1)
+        else:
+            l1 = self._ensure_value_is_boxed(arg1, forbidden_vars=boxes)
+
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        if guard_op is None:
+            res = self.force_allocate_reg(op.result)
+            return [l0, l1, res]
+        else:
+            args = self._prepare_guard(guard_op, [l0, l1])
+            return args
+    if name:
+        f.__name__ = name
+    return f
+
+def prepare_op_unary_cmp(name=None):
+    def f(self, op, guard_op, fcond):
+        assert fcond is not None
+        a0 = op.getarg(0)
+        assert isinstance(a0, Box)
+        reg = self._ensure_value_is_boxed(a0)
+        self.possibly_free_vars_for_op(op)
+        if guard_op is None:
+            res = self.force_allocate_reg(op.result, [a0])
+            return [reg, res]
+        else:
+            return self._prepare_guard(guard_op, [reg])
+    if name:
+        f.__name__ = name
+    return f
diff --git a/pypy/jit/backend/arm/instruction_builder.py b/pypy/jit/backend/arm/instruction_builder.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/instruction_builder.py
@@ -0,0 +1,419 @@
+from pypy.jit.backend.arm import conditions as cond
+from pypy.jit.backend.arm import instructions
+
+
+# move table lookup out of generated functions
+def define_load_store_func(name, table):
+    n = (0x1 << 26
+        | (table['A'] & 0x1) << 25
+        | (table['op1'] & 0x1F) << 20)
+    if 'B' in table:
+        b_zero = True
+    else:
+        b_zero = False
+    op1cond = table['op1not']
+    rncond = ('rn' in table and table['rn'] == '!0xF')
+    if table['imm']:
+        assert not b_zero
+
+        def f(self, rt, rn, imm=0, cond=cond.AL):
+            assert not (rncond and rn == 0xF)
+            p = 1
+            w = 0
+            u, imm = self._encode_imm(imm)
+            instr = (n
+                    | cond << 28
+                    | (p & 0x1) << 24
+                    | (u & 0x1) << 23
+                    | (w & 0x1) << 21
+                    | imm_operation(rt, rn, imm))
+            assert instr & 0x1F00000 != op1cond
+            self.write32(instr)
+    else:
+        def f(self, rt, rn, rm, imm=0, cond=cond.AL, s=0, shifttype=0):
+            assert not (rncond and rn == 0xF)
+            p = 1
+            w = 0
+            u, imm = self._encode_imm(imm)
+            instr = (n
+                    | cond << 28
+                    | (p & 0x1) << 24
+                    | (u & 0x1) << 23
+                    | (w & 0x1) << 21
+                    | reg_operation(rt, rn, rm, imm, s, shifttype))
+            if b_zero:
+                assert instr & 0x10 == 0, 'bit 4 should be zero'
+            assert instr & 0x1F00000 != op1cond
+            self.write32(instr)
+    return f
+
+
+def define_extra_load_store_func(name, table):
+    def check_registers(r1, r2):
+        assert r1 % 2 == 0
+        assert r1 + 1 == r2
+        assert r1 != 14
+
+    n = ((table['op1'] & 0x1F) << 20
+        | 0x1 << 7
+        | (table['op2'] & 0x3) << 5
+        | 0x1 << 4)
+    p = 1
+    w = 0
+    rncond = ('rn' in table and table['rn'] == '!0xF')
+    dual = (name[-4] == 'D')
+
+    if dual:
+        if name[-2:] == 'rr':
+            def f(self, rt, rt2, rn, rm, cond=cond.AL):
+                check_registers(rt, rt2)
+                assert not (rncond and rn == 0xF)
+                self.write32(n
+                        | cond << 28
+                        | (p & 0x1) << 24
+                        | (1 & 0x1) << 23
+                        | (w & 0x1) << 21
+                        | (rn & 0xF) << 16
+                        | (rt & 0xF) << 12
+                        | (rm & 0xF))
+        else:
+            def f(self, rt, rt2, rn, imm=0, cond=cond.AL):
+                check_registers(rt, rt2)
+                assert not (rncond and rn == 0xF)
+                u, imm = self._encode_imm(imm)
+                self.write32(n
+                        | cond << 28
+                        | (p & 0x1) << 24
+                        | (u & 0x1) << 23
+                        | (w & 0x1) << 21
+                        | (rn & 0xF) << 16
+                        | (rt & 0xF) << 12
+                        | ((imm >> 0x4) & 0xF) << 8
+                        | (imm & 0xF))
+
+    else:
+        if name[-2:] == 'rr':
+            def f(self, rt, rn, rm, cond=cond.AL):
+                assert not (rncond and rn == 0xF)
+                self.write32(n
+                        | cond << 28
+                        | (p & 0x1) << 24
+                        | (1 & 0x1) << 23
+                        | (w & 0x1) << 21
+                        | (rn & 0xF) << 16
+                        | (rt & 0xF) << 12
+                        | (rm & 0xF))
+        else:
+            def f(self, rt, rn, imm=0, cond=cond.AL):
+                assert not (rncond and rn == 0xF)
+                u, imm = self._encode_imm(imm)
+                self.write32(n
+                        | cond << 28
+                        | (p & 0x1) << 24
+                        | (u & 0x1) << 23
+                        | (w & 0x1) << 21
+                        | (rn & 0xF) << 16
+                        | (rt & 0xF) << 12
+                        | ((imm >> 0x4) & 0xF) << 8
+                        | (imm & 0xF))
+    return f
+
+
+def define_data_proc_imm_func(name, table):
+    n = (0x1 << 25
+        | (table['op'] & 0x1F) << 20)
+    if table['result'] and table['base']:
+        def imm_func(self, rd, rn, imm=0, cond=cond.AL, s=0):
+            if imm < 0:
+                raise ValueError
+            self.write32(n
+                | cond << 28
+                | s << 20
+                | imm_operation(rd, rn, imm))
+    elif not table['base']:
+        def imm_func(self, rd, imm=0, cond=cond.AL, s=0):
+            self.write32(n
+                | cond << 28
+                | s << 20
+                | imm_operation(rd, 0, imm))
+    else:
+        def imm_func(self, rn, imm=0, cond=cond.AL, s=0):
+            self.write32(n
+                | cond << 28
+                | s << 20
+                | imm_operation(0, rn, imm))
+    return imm_func
+
+
+def define_data_proc_func(name, table):
+    n = ((table['op1'] & 0x1F) << 20
+        | (table['op2'] & 0x1F) << 7
+        | (table['op3'] & 0x3) << 5)
+    if name[-2:] == 'ri':
+        def f(self, rd, rm, imm=0, cond=cond.AL, s=0):
+            if table['op2cond'] == '!0':
+                assert imm != 0
+            elif table['op2cond'] == '0':
+                assert imm == 0
+            self.write32(n
+                        | cond << 28
+                        | (s & 0x1) << 20
+                        | (rd & 0xFF) << 12
+                        | (imm & 0x1F) << 7
+                        | (rm & 0xFF))
+
+    elif not table['result']:
+        # ops without result
+        def f(self, rn, rm, imm=0, cond=cond.AL, s=0, shifttype=0):
+            self.write32(n
+                        | cond << 28
+                        | reg_operation(0, rn, rm, imm, s, shifttype))
+    elif not table['base']:
+        # ops without base register
+        def f(self, rd, rm, imm=0, cond=cond.AL, s=0, shifttype=0):
+            self.write32(n
+                        | cond << 28
+                        | reg_operation(rd, 0, rm, imm, s, shifttype))
+    else:
+        def f(self, rd, rn, rm, imm=0, cond=cond.AL, s=0, shifttype=0):
+            self.write32(n
+                        | cond << 28
+                        | reg_operation(rd, rn, rm, imm, s, shifttype))
+    return f
+
+
+def define_data_proc_reg_shift_reg_func(name, table):
+    n = ((0x1 << 4) | (table['op1'] & 0x1F) << 20 | (table['op2'] & 0x3) << 5)
+    if 'result' in table and not table['result']:
+        result = False
+    else:
+        result = True
+    if name[-2:] == 'sr':
+        if result:
+            def f(self, rd, rn, rm, rs, cond=cond.AL, s=0, shifttype=0):
+                self.write32(n
+                            | cond << 28
+                            | (s & 0x1) << 20
+                            | (rn & 0xF) << 16
+                            | (rd & 0xF) << 12
+                            | (rs & 0xF) << 8
+                            | (shifttype & 0x3) << 5
+                            | (rm & 0xF))
+        else:
+            def f(self, rn, rm, rs, cond=cond.AL, s=0, shifttype=0):
+                self.write32(n
+                            | cond << 28
+                            | (s & 0x1) << 20
+                            | (rn & 0xF) << 16
+                            | (rs & 0xF) << 8
+                            | (shifttype & 0x3) << 5
+                            | (rm & 0xF))
+    else:
+        def f(self, rd, rn, rm, cond=cond.AL, s=0):
+            self.write32(n
+                        | cond << 28
+                        | (s & 0x1) << 20
+                        | (rd & 0xF) << 12
+                        | (rm & 0xF) << 8
+                        | (rn & 0xF))
+    return f
+
+
+def define_supervisor_and_coproc_func(name, table):
+    n = (0x3 << 26 | (table['op1'] & 0x3F) << 20 | (table['op'] & 0x1) << 4)
+
+    def f(self, coproc, opc1, rt, crn, crm, opc2=0, cond=cond.AL):
+        assert coproc & 0xE != 0xA
+        self.write32(n
+                    | cond << 28
+                    | (opc1 & 0x7) << 21
+                    | (crn & 0xF) << 16
+                    | (rt & 0xF) << 12
+                    | (coproc & 0xF) << 8
+                    | (opc2 & 0x7) << 5
+                    | (crm & 0xF))
+    return f
+
+
+def define_multiply_func(name, table):
+    n = (table['op'] & 0xF) << 20 | 0x9 << 4
+    if 'acc' in table and table['acc']:
+        if 'update_flags' in table and table['update_flags']:
+            def f(self, rd, rn, rm, ra, cond=cond.AL, s=0):
+                self.write32(n
+                            | cond << 28
+                            | (s & 0x1) << 20
+                            | (rd & 0xF) << 16
+                            | (ra & 0xF) << 12
+                            | (rm & 0xF) << 8
+                            | (rn & 0xF))
+        else:
+            def f(self, rd, rn, rm, ra, cond=cond.AL):
+                self.write32(n
+                            | cond << 28
+                            | (rd & 0xF) << 16
+                            | (ra & 0xF) << 12
+                            | (rm & 0xF) << 8
+                            | (rn & 0xF))
+
+    elif 'long' in table and table['long']:
+        def f(self, rdlo, rdhi, rn, rm, cond=cond.AL):
+            assert rdhi != rdlo
+            self.write32(n
+                    | cond << 28
+                    | (rdhi & 0xF) << 16
+                    | (rdlo & 0xF) << 12
+                    | (rm & 0xF) << 8
+                    | (rn & 0xF))
+    else:
+        def f(self, rd, rn, rm, cond=cond.AL, s=0):
+            self.write32(n
+                        | cond << 28
+                        | (s & 0x1) << 20
+                        | (rd & 0xF) << 16
+                        | (rm & 0xF) << 8
+                        | (rn & 0xF))
+
+    return f
+
+
+def define_block_data_func(name, table):
+    n = (table['op'] & 0x3F) << 20
+
+    def f(self, rn, regs, w=0, cond=cond.AL):
+        # no R bit for now at bit 15
+        instr = (n
+                | cond << 28
+                | 0x1 << 27
+                | (w & 0x1) << 21
+                | (rn & 0xF) << 16)
+        instr = self._encode_reg_list(instr, regs)
+        self.write32(instr)
+
+    return f
+
+
+def define_float_load_store_func(name, table):
+    n = (0x3 << 26
+        | (table['opcode'] & 0x1F) << 20
+        | 0x5 << 0x9
+        | 0x1 << 0x8)
+
+    # The imm value for thins function has to be a multiple of 4,
+    # the value actually encoded is imm / 4
+    def f(self, dd, rn, imm=0, cond=cond.AL):
+        assert imm % 4 == 0
+        imm = imm / 4
+        u, imm = self._encode_imm(imm)
+        instr = (n
+                | (cond & 0xF) << 28
+                | (u & 0x1) << 23
+                | (rn & 0xF) << 16
+                | (dd & 0xF) << 12
+                | (imm & 0xFF))
+        self.write32(instr)
+    return f
+
+
+def define_float64_data_proc_instructions_func(name, table):
+    n = (0xE << 24
+        | 0x5 << 9
+        | 0x1 << 8  # 64 bit flag
+        | (table['opc3'] & 0x3) << 6)
+
+    if 'opc1' in table:
+        n |= (table['opc1'] & 0xF) << 20
+    if 'opc2' in table:
+        n |= (table['opc2'] & 0xF) << 16
+
+    if 'result' in table and not table['result']:
+        def f(self, dd, dm, cond=cond.AL):
+            instr = (n
+                    | (cond & 0xF) << 28
+                    | 0x4 << 16
+                    | (dd & 0xF) << 12
+                    | (dm & 0xF))
+            self.write32(instr)
+    elif 'base' in table and not table['base']:
+        def f(self, dd, dm, cond=cond.AL):
+            instr = (n
+                    | (cond & 0xF) << 28
+                    | (dd & 0xF) << 12
+                    | (dm & 0xF))
+            self.write32(instr)
+    else:
+        def f(self, dd, dn, dm, cond=cond.AL):
+            instr = (n
+                    | (cond & 0xF) << 28
+                    | (dn & 0xF) << 16
+                    | (dd & 0xF) << 12
+                    | (dm & 0xF))
+            self.write32(instr)
+    return f
+
+def define_simd_instructions_3regs_func(name, table):
+    n = 0
+    if 'A' in table:
+        n |= (table['A'] & 0xF) << 8
+    if 'B' in table:
+        n |= (table['B'] & 0x1) << 4
+    if 'U' in table:
+        n |= (table['U'] & 0x1) << 24
+    if 'C' in table:
+        n |= (table['C'] & 0x3) << 20
+    if name == 'VADD_i64' or name == 'VSUB_i64':
+        size = 0x3 << 20
+        n |= size
+    def f(self, dd, dn, dm):
+        base = 0x79
+        N = (dn >> 4) & 0x1
+        M = (dm >> 4) & 0x1
+        D = (dd >> 4) & 0x1
+        Q = 0 # we want doubleword regs
+        instr = (n
+                | base << 25
+                | D << 22
+                | (dn & 0xf) << 16
+                | (dd & 0xf) << 12
+                | N << 7
+                | Q << 6
+                | M << 5
+                | (dm & 0xf))
+        
+        self.write32(instr)
+    return f
+
+
+def imm_operation(rt, rn, imm):
+    return ((rn & 0xFF) << 16
+    | (rt & 0xFF) << 12
+    | (imm & 0xFFF))
+
+
+def reg_operation(rt, rn, rm, imm, s, shifttype):
+    return ((s & 0x1) << 20
+            | (rn & 0xF) << 16
+            | (rt & 0xF) << 12
+            | (imm & 0x1F) << 7
+            | (shifttype & 0x3) << 5
+            | (rm & 0xF))
+
+
+def define_instruction(builder, key, val, target):
+    f = builder(key, val)
+    f.__name__ = key
+    setattr(target, key, f)
+
+
+def define_instructions(target):
+    inss = [k for k in instructions.__dict__.keys() if not k.startswith('__')]
+    for name in inss:
+        if name == 'branch':
+            continue
+        try:
+            func = globals()['define_%s_func' % name]
+        except KeyError:
+            print 'No instr generator for %s instructions' % name
+            continue
+        for key, value in getattr(instructions, name).iteritems():
+            define_instruction(func, key, value, target)
diff --git a/pypy/jit/backend/arm/instructions.py b/pypy/jit/backend/arm/instructions.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/instructions.py
@@ -0,0 +1,151 @@
+load_store = {
+    'STR_ri': {'A': 0, 'op1': 0x0, 'op1not': 0x2, 'imm': True},
+    'STR_rr': {'A': 1, 'op1': 0x0, 'op1not': 0x2, 'B': 0, 'imm': False},
+    'LDR_ri': {'A': 0, 'op1': 0x1, 'op1not': 0x3, 'imm': True},
+    'LDR_rr': {'A': 1, 'op1': 0x1, 'op1not': 0x3, 'B': 0, 'imm': False},
+    'STRB_ri': {'A': 0, 'op1': 0x4, 'op1not': 0x6, 'rn': '!0xF', 'imm': True},
+    'STRB_rr': {'A': 1, 'op1': 0x4, 'op1not': 0x6, 'B': 0, 'imm': False},
+    'LDRB_ri': {'A': 0, 'op1': 0x5, 'op1not': 0x7, 'rn': '!0xF', 'imm': True},
+    'LDRB_rr': {'A': 1, 'op1': 0x5, 'op1not': 0x7, 'B': 0, 'imm': False},
+}
+extra_load_store = {  # Section 5.2.8
+    'STRH_rr':  {'op2': 0x1, 'op1': 0x0},
+    'LDRH_rr':  {'op2': 0x1, 'op1': 0x1},
+    'STRH_ri':  {'op2': 0x1, 'op1': 0x4},
+    'LDRH_ri':  {'op2': 0x1, 'op1': 0x5, 'rn': '!0xF'},
+    'LDRD_rr':  {'op2': 0x2, 'op1': 0x0},
+    'LDRSB_rr': {'op2': 0x2, 'op1': 0x1},
+    'LDRD_ri':  {'op2': 0x2, 'op1': 0x4},
+    'LDRSB_ri': {'op2': 0x2, 'op1': 0x5, 'rn': '!0xF'},
+    'STRD_rr':  {'op2': 0x3, 'op1': 0x0},
+    'LDRSH_rr': {'op2': 0x3, 'op1': 0x1},
+    'STRD_ri':  {'op2': 0x3, 'op1': 0x4},
+    'LDRSH_ri': {'op2': 0x3, 'op1': 0x5, 'rn': '!0xF'},
+}
+
+
+data_proc = {
+    'AND_rr': {'op1': 0x0, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'EOR_rr': {'op1': 0x2, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'SUB_rr': {'op1': 0x4, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'RSB_rr': {'op1': 0x6, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'ADD_rr': {'op1': 0x8, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'ADC_rr': {'op1': 0xA, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'SBC_rr': {'op1': 0xC, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'RSC_rr': {'op1': 0xE, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'TST_rr': {'op1': 0x11, 'op2': 0, 'op3': 0, 'result': False, 'base': True},
+    'TEQ_rr': {'op1': 0x13, 'op2': 0, 'op3': 0, 'result': False, 'base': True},
+    'CMP_rr': {'op1': 0x15, 'op2': 0, 'op3': 0, 'result': False, 'base': True},
+    'CMN_rr': {'op1': 0x17, 'op2': 0, 'op3': 0, 'result': False, 'base': True},
+    'ORR_rr': {'op1': 0x18, 'op2': 0, 'op3': 0, 'result': True, 'base': True},
+    'MOV_rr': {'op1': 0x1A, 'op2': 0, 'op3': 0, 'result': True, 'base': False},
+    'LSL_ri': {'op1': 0x1A, 'op2': 0x0, 'op3': 0, 'op2cond': '!0',
+                                                'result': False, 'base': True},
+    'LSR_ri': {'op1': 0x1A, 'op2': 0, 'op3': 0x1, 'op2cond': '',
+                                                'result': False, 'base': True},
+    'ASR_ri': {'op1': 0x1A, 'op2': 0, 'op3': 0x2, 'op2cond': '',
+                                                'result': False, 'base': True},
+    'ROR_ri': {'op1': 0x1A, 'op2': 0x0, 'op3': 0x3, 'op2cond': '!0',
+                                                'result': True, 'base': False},
+    'MVN_rr': {'op1': 0x1E, 'op2': 0x0, 'op3': 0x0, 'result': True,
+                                                                'base': False},
+
+}
+
+data_proc_reg_shift_reg = {
+    'AND_rr_sr': {'op1': 0x0,  'op2': 0},
+    'EOR_rr_sr': {'op1': 0x2,  'op2': 0},
+    'SUB_rr_sr': {'op1': 0x4,  'op2': 0},
+    'RSB_rr_sr': {'op1': 0x6,  'op2': 0},
+    'ADD_rr_sr': {'op1': 0x8,  'op2': 0},
+    'ADC_rr_sr': {'op1': 0xA,  'op2': 0},
+    'SBC_rr_sr': {'op1': 0xC,  'op2': 0},
+    'RSC_rr_sr': {'op1': 0xE,  'op2': 0},
+    'TST_rr_sr': {'op1': 0x11, 'op2': 0, 'result': False},
+    'TEQ_rr_sr': {'op1': 0x13, 'op2': 0, 'result': False},
+    'CMP_rr_sr': {'op1': 0x15, 'op2': 0, 'result': False},
+    'CMN_rr_sr': {'op1': 0x17, 'op2': 0, 'result': False},
+    'ORR_rr_sr': {'op1': 0x18, 'op2': 0},
+    'LSL_rr': {'op1': 0x1A, 'op2': 0, },
+    'LSR_rr': {'op1': 0x1A, 'op2': 0x1},
+    'ASR_rr': {'op1': 0x1A, 'op2': 0x2},
+    'ROR_rr': {'op1': 0x1A, 'op2': 0x3},
+}
+
+data_proc_imm = {
+    'AND_ri': {'op': 0, 'result': True, 'base': True},
+    'EOR_ri': {'op': 0x2, 'result': True, 'base': True},
+    'SUB_ri': {'op': 0x4, 'result': True, 'base': True},
+    'RSB_ri': {'op': 0x6, 'result': True, 'base': True},
+    'ADD_ri': {'op': 0x8, 'result': True, 'base': True},
+    'ADC_ri': {'op': 0xA, 'result': True, 'base': True},
+    'SBC_ri': {'op': 0xC, 'result': True, 'base': True},
+    'RSC_ri': {'op': 0xE, 'result': True, 'base': True},
+    'TST_ri': {'op': 0x11, 'result': False, 'base': True},
+    'TEQ_ri': {'op': 0x13, 'result': False, 'base': True},
+    'CMP_ri': {'op': 0x15, 'result': False, 'base': True},
+    'CMN_ri': {'op': 0x17, 'result': False, 'base': True},
+    'ORR_ri': {'op': 0x18, 'result': True, 'base': True},
+    'MOV_ri': {'op': 0x1A, 'result': True, 'base': False},
+    'BIC_ri': {'op': 0x1C, 'result': True, 'base': True},
+    'MVN_ri': {'op': 0x1E, 'result': True, 'base': False},
+}
+
+supervisor_and_coproc = {
+    'MCR': {'op1': 0x20, 'op': 1, 'rn':0, 'coproc':0},
+    'MRC': {'op1': 0x21, 'op': 1, 'rn':0, 'coproc':0},
+}
+
+block_data = {
+    'STMDA': {'op': 0x0},
+    'LDMDA': {'op': 0x1},
+    'STMIA': {'op': 0x8},
+    'LDMDB': {'op': 0x11},
+    'STMIB': {'op': 0x18},
+    'LDMIB': {'op': 0x19},
+    #'STM':   {'op': 0x4},
+    #'LDM':   {'op': 0x5},
+}
+branch = {
+    'B':     {'op': 0x20},
+    'BL':    {'op': 0x30},
+}
+
+multiply = {
+    'MUL':   {'op':0x0},
+    'MLA':   {'op':0x2, 'acc': True, 'update_flags':True},
+    'UMAAL': {'op':0x4, 'long': True},
+    'MLS':   {'op':0x6, 'acc': True},
+    'UMULL': {'op':0x8, 'long': True},
+    'UMLAL': {'op':0xA, 'long': True},
+    'SMULL': {'op':0xC, 'long': True},
+    'SMLAL': {'op':0xE, 'long': True},
+}
+
+float_load_store = {
+    'VSTR':    {'opcode': 0x10},
+    'VLDR':    {'opcode': 0x11},
+}
+
+
+# based on encoding from A7.5 VFP data-processing instructions
+# opc2 is one of the parameters and therefore ignored here
+float64_data_proc_instructions = {
+    'VADD'  : {'opc1':0x3, 'opc3':0x0},
+    'VSUB'  : {'opc1':0x3, 'opc3':0x1},
+    'VMUL'  : {'opc1':0x2, 'opc3':0x0},
+    'VDIV'  : {'opc1':0x8, 'opc3':0x0},
+    'VCMP'  : {'opc1':0xB, 'opc2':0x4, 'opc3':0x1, 'result': False},
+    'VNEG'  : {'opc1':0xB, 'opc2':0x1, 'opc3':0x1, 'base': False},
+    'VABS'  : {'opc1':0xB, 'opc2':0x0, 'opc3':0x3, 'base': False},
+    'VSQRT' : {'opc1':0xB, 'opc2':0x1, 'opc3':0x3, 'base': False},
+    #'VCVT' : {'opc1':0xB, 'opc2':0xE, 'opc3':0x1, 'base': False},
+}
+
+simd_instructions_3regs = {
+    'VADD_i64': {'A': 0x8, 'B': 0, 'U': 0},
+    'VSUB_i64': {'A': 0x8, 'B': 0, 'U': 1},
+    'VAND_i64': {'A': 0x1, 'B': 1, 'U': 0, 'C': 0},
+    'VORR_i64': {'A': 0x1, 'B': 1, 'U': 0, 'C': 0x2},
+    'VEOR_i64': {'A': 0x1, 'B': 1, 'U': 1, 'C': 0x0},
+}
diff --git a/pypy/jit/backend/arm/jump.py b/pypy/jit/backend/arm/jump.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/jump.py
@@ -0,0 +1,114 @@
+# ../x86/jump.py
+# XXX combine with ../x86/jump.py and move to llsupport
+
+
+def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
+    pending_dests = len(dst_locations)
+    srccount = {}    # maps dst_locations to how many times the same
+                     # location appears in src_locations
+    for dst in dst_locations:
+        key = dst.as_key()
+        assert key not in srccount, "duplicate value in dst_locations!"
+        srccount[key] = 0
+    for i in range(len(dst_locations)):
+        src = src_locations[i]
+        if src.is_imm():
+            continue
+        key = src.as_key()
+        if key in srccount:
+            if key == dst_locations[i].as_key():
+                # ignore a move "x = x"
+                # setting any "large enough" negative value is ok, but
+                # be careful of overflows, don't use -sys.maxint
+                srccount[key] = -len(dst_locations) - 1
+                pending_dests -= 1
+            else:
+                srccount[key] += 1
+
+    while pending_dests > 0:
+        progress = False
+        for i in range(len(dst_locations)):
+            dst = dst_locations[i]
+            key = dst.as_key()
+            if srccount[key] == 0:
+                srccount[key] = -1       # means "it's done"
+                pending_dests -= 1
+                src = src_locations[i]
+                if not src.is_imm():
+                    key = src.as_key()
+                    if key in srccount:
+                        srccount[key] -= 1
+                _move(assembler, src, dst, tmpreg)
+                progress = True
+        if not progress:
+            # we are left with only pure disjoint cycles
+            sources = {}     # maps dst_locations to src_locations
+            for i in range(len(dst_locations)):
+                src = src_locations[i]
+                dst = dst_locations[i]
+                sources[dst.as_key()] = src
+            #
+            for i in range(len(dst_locations)):
+                dst = dst_locations[i]
+                originalkey = dst.as_key()
+                if srccount[originalkey] >= 0:
+                    assembler.regalloc_push(dst)
+                    while True:
+                        key = dst.as_key()
+                        assert srccount[key] == 1
+                        # ^^^ because we are in a simple cycle
+                        srccount[key] = -1
+                        pending_dests -= 1
+                        src = sources[key]
+                        if src.as_key() == originalkey:
+                            break
+                        _move(assembler, src, dst, tmpreg)
+                        dst = src
+                    assembler.regalloc_pop(dst)
+            assert pending_dests == 0
+
+
+def _move(assembler, src, dst, tmpreg):
+    if dst.is_stack() and src.is_stack():
+        assembler.regalloc_mov(src, tmpreg)
+        src = tmpreg
+    assembler.regalloc_mov(src, dst)
+
+
+def remap_frame_layout_mixed(assembler,
+                             src_locations1, dst_locations1, tmpreg1,
+                             src_locations2, dst_locations2, tmpreg2):
+    # find and push the xmm stack locations from src_locations2 that
+    # are going to be overwritten by dst_locations1
+    from pypy.jit.backend.arm.arch import WORD
+    extrapushes = []
+    dst_keys = {}
+    for loc in dst_locations1:
+        dst_keys[loc.as_key()] = None
+    src_locations2red = []
+    dst_locations2red = []
+    for i in range(len(src_locations2)):
+        loc = src_locations2[i]
+        dstloc = dst_locations2[i]
+        if loc.is_stack():
+            key = loc.as_key()
+            if (key in dst_keys or (loc.width > WORD and
+                                    (key + 1) in dst_keys)):
+                assembler.regalloc_push(loc)
+                extrapushes.append(dstloc)
+                continue
+        src_locations2red.append(loc)
+        dst_locations2red.append(dstloc)
+    src_locations2 = src_locations2red
+    dst_locations2 = dst_locations2red
+    #
+    # remap the integer and pointer registers and stack locations
+    remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+    #
+    # remap the vfp registers and stack locations
+    remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+    #
+    # finally, pop the extra xmm stack locations
+    while len(extrapushes) > 0:
+        loc = extrapushes.pop()
+        assembler.regalloc_pop(loc)
diff --git a/pypy/jit/backend/arm/locations.py b/pypy/jit/backend/arm/locations.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/locations.py
@@ -0,0 +1,144 @@
+from pypy.jit.metainterp.history import INT, FLOAT
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD
+
+
+class AssemblerLocation(object):
+    _immutable_ = True
+    type = INT
+
+    def is_imm(self):
+        return False
+
+    def is_stack(self):
+        return False
+
+    def is_reg(self):
+        return False
+
+    def is_vfp_reg(self):
+        return False
+
+    def is_imm_float(self):
+        return False
+
+    def as_key(self):
+        raise NotImplementedError
+
+
+class RegisterLocation(AssemblerLocation):
+    _immutable_ = True
+    width = WORD
+
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return 'r%d' % self.value
+
+    def is_reg(self):
+        return True
+
+    def as_key(self):
+        return self.value
+
+
+class VFPRegisterLocation(RegisterLocation):
+    _immutable_ = True
+    type = FLOAT
+    width = 2 * WORD
+
+    def get_single_precision_regs(self):
+        return [VFPRegisterLocation(i) for i in
+                        [self.value * 2, self.value * 2 + 1]]
+
+    def __repr__(self):
+        return 'vfp%d' % self.value
+
+    def is_reg(self):
+        return False
+
+    def is_vfp_reg(self):
+        return True
+
+    def as_key(self):
+        return self.value + 20
+
+
+class ImmLocation(AssemblerLocation):
+    _immutable_ = True
+    width = WORD
+
+    def __init__(self, value):
+        self.value = value
+
+    def getint(self):
+        return self.value
+
+    def __repr__(self):
+        return "imm(%d)" % (self.value)
+
+    def is_imm(self):
+        return True
+
+
+class ConstFloatLoc(AssemblerLocation):
+    """This class represents an imm float value which is stored in memory at
+    the address stored in the field value"""
+    _immutable_ = True
+    width = 2 * WORD
+    type = FLOAT
+
+    def __init__(self, value):
+        self.value = value
+
+    def getint(self):
+        return self.value
+
+    def __repr__(self):
+        return "imm_float(stored at %d)" % (self.value)
+
+    def is_imm_float(self):
+        return True
+
+    def as_key(self):
+        return self.value
+
+
+class StackLocation(AssemblerLocation):
+    _immutable_ = True
+
+    def __init__(self, position, fp_offset, type=INT):
+        if type == FLOAT:
+            self.width = DOUBLE_WORD
+        else:
+            self.width = WORD
+        self.position = position
+        self.value = fp_offset
+        self.type = type
+
+    def __repr__(self):
+        return 'FP(%s)+%d' % (self.type, self.position,)
+
+    def location_code(self):
+        return 'b'
+
+    def assembler(self):
+        return repr(self)
+
+    def is_stack(self):
+        return True
+
+    def as_key(self):
+        return self.position + 10000
+
+
+def imm(i):
+    return ImmLocation(i)
+
+
+def get_fp_offset(i):
+    if i >= 0:
+        # Take the FORCE_TOKEN into account
+        return (1 + i) * WORD
+    else:
+        return i * WORD
diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -0,0 +1,1448 @@
+from __future__ import with_statement
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm import shift
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD
+
+from pypy.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
+                                                gen_emit_op_unary_cmp,
+                                                gen_emit_guard_unary_cmp,
+                                                gen_emit_op_ri,
+                                                gen_emit_cmp_op,
+                                                gen_emit_cmp_op_guard,
+                                                gen_emit_float_op,
+                                                gen_emit_float_cmp_op,
+                                                gen_emit_float_cmp_op_guard,
+                                                gen_emit_unary_float_op,
+                                                saved_registers,
+                                                count_reg_args)
+from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
+from pypy.jit.backend.arm.jump import remap_frame_layout
+from pypy.jit.backend.arm.regalloc import TempInt, TempPtr
+from pypy.jit.backend.arm.locations import imm
+from pypy.jit.backend.llsupport import symbolic
+from pypy.jit.backend.llsupport.descr import InteriorFieldDescr
+from pypy.jit.metainterp.history import (Box, AbstractFailDescr,
+                                            INT, FLOAT, REF)
+from pypy.jit.metainterp.history import JitCellToken, TargetToken
+from pypy.jit.metainterp.resoperation import rop
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rpython.lltypesystem import rstr
+
+NO_FORCE_INDEX = -1
+
+
+class GuardToken(object):
+    def __init__(self, descr, failargs, faillocs, offset,
+                            save_exc, fcond=c.AL, is_invalidate=False):
+        assert isinstance(save_exc, bool)
+        self.descr = descr
+        self.offset = offset
+        self.is_invalidate = is_invalidate
+        self.failargs = failargs
+        self.faillocs = faillocs
+        self.save_exc = save_exc
+        self.fcond = fcond
+
+
+class ResOpAssembler(object):
+
+    def emit_op_int_add(self, op, arglocs, regalloc, fcond):
+        return self.int_add_impl(op, arglocs, regalloc, fcond)
+
+    def int_add_impl(self, op, arglocs, regalloc, fcond, flags=False):
+        l0, l1, res = arglocs
+        if flags:
+            s = 1
+        else:
+            s = 0
+        if l0.is_imm():
+            self.mc.ADD_ri(res.value, l1.value, imm=l0.value, s=s)
+        elif l1.is_imm():
+            self.mc.ADD_ri(res.value, l0.value, imm=l1.value, s=s)
+        else:
+            self.mc.ADD_rr(res.value, l0.value, l1.value, s=1)
+
+        return fcond
+
+    def emit_op_int_sub(self, op, arglocs, regalloc, fcond, flags=False):
+        return self.int_sub_impl(op, arglocs, regalloc, fcond)
+
+    def int_sub_impl(self, op, arglocs, regalloc, fcond, flags=False):
+        l0, l1, res = arglocs
+        if flags:
+            s = 1
+        else:
+            s = 0
+        if l0.is_imm():
+            value = l0.getint()
+            assert value >= 0
+            # reverse substract ftw
+            self.mc.RSB_ri(res.value, l1.value, value, s=s)
+        elif l1.is_imm():
+            value = l1.getint()
+            assert value >= 0
+            self.mc.SUB_ri(res.value, l0.value, value, s=s)
+        else:
+            self.mc.SUB_rr(res.value, l0.value, l1.value, s=s)
+
+        return fcond
+
+    def emit_op_int_mul(self, op, arglocs, regalloc, fcond):
+        reg1, reg2, res = arglocs
+        self.mc.MUL(res.value, reg1.value, reg2.value)
+        return fcond
+
+    def emit_op_int_force_ge_zero(self, op, arglocs, regalloc, fcond):
+        arg, res = arglocs
+        self.mc.CMP_ri(arg.value, 0)
+        self.mc.MOV_ri(res.value, 0, cond=c.LT)
+        self.mc.MOV_rr(res.value, arg.value, cond=c.GE)
+        return fcond
+
+    #ref: http://blogs.arm.com/software-enablement/detecting-overflow-from-mul/
+    def emit_guard_int_mul_ovf(self, op, guard, arglocs, regalloc, fcond):
+        reg1 = arglocs[0]
+        reg2 = arglocs[1]
+        res = arglocs[2]
+        failargs = arglocs[3:]
+        self.mc.SMULL(res.value, r.ip.value, reg1.value, reg2.value,
+                                                                cond=fcond)
+        self.mc.CMP_rr(r.ip.value, res.value, shifttype=shift.ASR,
+                                                        imm=31, cond=fcond)
+
+        if guard.getopnum() == rop.GUARD_OVERFLOW:
+            fcond = self._emit_guard(guard, failargs, c.NE, save_exc=False)
+        elif guard.getopnum() == rop.GUARD_NO_OVERFLOW:
+            fcond = self._emit_guard(guard, failargs, c.EQ, save_exc=False)
+        else:
+            assert 0
+        return fcond
+
+    def emit_guard_int_add_ovf(self, op, guard, arglocs, regalloc, fcond):
+        self.int_add_impl(op, arglocs[0:3], regalloc, fcond, flags=True)
+        self._emit_guard_overflow(guard, arglocs[3:], fcond)
+        return fcond
+
+    def emit_guard_int_sub_ovf(self, op, guard, arglocs, regalloc, fcond):
+        self.int_sub_impl(op, arglocs[0:3], regalloc, fcond, flags=True)
+        self._emit_guard_overflow(guard, arglocs[3:], fcond)
+        return fcond
+
+    emit_op_int_floordiv = gen_emit_op_by_helper_call('int_floordiv', 'DIV')
+    emit_op_int_mod = gen_emit_op_by_helper_call('int_mod', 'MOD')
+    emit_op_uint_floordiv = gen_emit_op_by_helper_call('uint_floordiv', 'UDIV')
+
+    emit_op_int_and = gen_emit_op_ri('int_and', 'AND')
+    emit_op_int_or = gen_emit_op_ri('int_or', 'ORR')
+    emit_op_int_xor = gen_emit_op_ri('int_xor', 'EOR')
+    emit_op_int_lshift = gen_emit_op_ri('int_lshift', 'LSL')
+    emit_op_int_rshift = gen_emit_op_ri('int_rshift', 'ASR')
+    emit_op_uint_rshift = gen_emit_op_ri('uint_rshift', 'LSR')
+
+    emit_op_int_lt = gen_emit_cmp_op('int_lt', c.LT)
+    emit_op_int_le = gen_emit_cmp_op('int_le', c.LE)
+    emit_op_int_eq = gen_emit_cmp_op('int_eq', c.EQ)
+    emit_op_int_ne = gen_emit_cmp_op('int_ne', c.NE)
+    emit_op_int_gt = gen_emit_cmp_op('int_gt', c.GT)
+    emit_op_int_ge = gen_emit_cmp_op('int_ge', c.GE)
+
+    emit_guard_int_lt = gen_emit_cmp_op_guard('int_lt', c.LT)
+    emit_guard_int_le = gen_emit_cmp_op_guard('int_le', c.LE)
+    emit_guard_int_eq = gen_emit_cmp_op_guard('int_eq', c.EQ)
+    emit_guard_int_ne = gen_emit_cmp_op_guard('int_ne', c.NE)
+    emit_guard_int_gt = gen_emit_cmp_op_guard('int_gt', c.GT)
+    emit_guard_int_ge = gen_emit_cmp_op_guard('int_ge', c.GE)
+
+    emit_op_uint_le = gen_emit_cmp_op('uint_le', c.LS)
+    emit_op_uint_gt = gen_emit_cmp_op('uint_gt', c.HI)
+    emit_op_uint_lt = gen_emit_cmp_op('uint_lt', c.LO)
+    emit_op_uint_ge = gen_emit_cmp_op('uint_ge', c.HS)
+
+    emit_guard_uint_le = gen_emit_cmp_op_guard('uint_le', c.LS)
+    emit_guard_uint_gt = gen_emit_cmp_op_guard('uint_gt', c.HI)
+    emit_guard_uint_lt = gen_emit_cmp_op_guard('uint_lt', c.LO)
+    emit_guard_uint_ge = gen_emit_cmp_op_guard('uint_ge', c.HS)
+
+    emit_op_ptr_eq = emit_op_instance_ptr_eq = emit_op_int_eq
+    emit_op_ptr_ne = emit_op_instance_ptr_ne = emit_op_int_ne
+    emit_guard_ptr_eq = emit_guard_instance_ptr_eq = emit_guard_int_eq
+    emit_guard_ptr_ne = emit_guard_instance_ptr_ne = emit_guard_int_ne
+
+    emit_op_int_add_ovf = emit_op_int_add
+    emit_op_int_sub_ovf = emit_op_int_sub
+
+    emit_op_int_is_true = gen_emit_op_unary_cmp('int_is_true', c.NE)
+    emit_op_int_is_zero = gen_emit_op_unary_cmp('int_is_zero', c.EQ)
+
+    emit_guard_int_is_true = gen_emit_guard_unary_cmp('int_is_true', c.NE)
+    emit_guard_int_is_zero = gen_emit_guard_unary_cmp('int_is_zero', c.EQ)
+
+    def emit_op_int_invert(self, op, arglocs, regalloc, fcond):
+        reg, res = arglocs
+
+        self.mc.MVN_rr(res.value, reg.value)
+        return fcond
+
+    def emit_op_int_neg(self, op, arglocs, regalloc, fcond):
+        l0, resloc = arglocs
+        self.mc.RSB_ri(resloc.value, l0.value, imm=0)
+        return fcond
+
+    def _emit_guard(self, op, arglocs, fcond, save_exc,
+                                    is_guard_not_invalidated=False):
+        assert isinstance(save_exc, bool)
+        assert isinstance(fcond, int)
+        descr = op.getdescr()
+        assert isinstance(descr, AbstractFailDescr)
+
+        pos = self.mc.currpos()
+        # For all guards that are not GUARD_NOT_INVALIDATED we emit a
+        # breakpoint to ensure the location is patched correctly. In the case
+        # of GUARD_NOT_INVALIDATED we use just a NOP, because it is only
+        # eventually patched at a later point.
+        if is_guard_not_invalidated:
+            self.mc.NOP()
+        else:
+            self.mc.BKPT()
+        self.pending_guards.append(GuardToken(descr,
+                                    failargs=op.getfailargs(),
+                                    faillocs=arglocs,
+                                    offset=pos,
+                                    save_exc=save_exc,
+                                    is_invalidate=is_guard_not_invalidated,
+                                    fcond=fcond))
+        return c.AL
+
+    def _emit_guard_overflow(self, guard, failargs, fcond):
+        if guard.getopnum() == rop.GUARD_OVERFLOW:
+            fcond = self._emit_guard(guard, failargs, c.VS, save_exc=False)
+        elif guard.getopnum() == rop.GUARD_NO_OVERFLOW:
+            fcond = self._emit_guard(guard, failargs, c.VC, save_exc=False)
+        else:
+            assert 0
+        return fcond
+
+    def emit_op_guard_true(self, op, arglocs, regalloc, fcond):
+        l0 = arglocs[0]
+        failargs = arglocs[1:]
+        self.mc.CMP_ri(l0.value, 0)
+        fcond = self._emit_guard(op, failargs, c.NE, save_exc=False)
+        return fcond
+
+    def emit_op_guard_false(self, op, arglocs, regalloc, fcond):
+        l0 = arglocs[0]
+        failargs = arglocs[1:]
+        self.mc.CMP_ri(l0.value, 0)
+        fcond = self._emit_guard(op, failargs, c.EQ, save_exc=False)
+        return fcond
+
+    def emit_op_guard_value(self, op, arglocs, regalloc, fcond):
+        l0 = arglocs[0]
+        l1 = arglocs[1]
+        failargs = arglocs[2:]
+
+        if l0.is_reg():
+            if l1.is_imm():
+                self.mc.CMP_ri(l0.value, l1.getint())
+            else:
+                self.mc.CMP_rr(l0.value, l1.value)
+        elif l0.is_vfp_reg():
+            assert l1.is_vfp_reg()
+            self.mc.VCMP(l0.value, l1.value)
+            self.mc.VMRS(cond=fcond)
+        fcond = self._emit_guard(op, failargs, c.EQ, save_exc=False)
+        return fcond
+
+    emit_op_guard_nonnull = emit_op_guard_true
+    emit_op_guard_isnull = emit_op_guard_false
+
+    def emit_op_guard_no_overflow(self, op, arglocs, regalloc, fcond):
+        return self._emit_guard(op, arglocs, c.VC, save_exc=False)
+
+    def emit_op_guard_overflow(self, op, arglocs, regalloc, fcond):
+        return self._emit_guard(op, arglocs, c.VS, save_exc=False)
+
+    def emit_op_guard_class(self, op, arglocs, regalloc, fcond):
+        self._cmp_guard_class(op, arglocs, regalloc, fcond)
+        self._emit_guard(op, arglocs[3:], c.EQ, save_exc=False)
+        return fcond
+
+    def emit_op_guard_nonnull_class(self, op, arglocs, regalloc, fcond):
+        self.mc.CMP_ri(arglocs[0].value, 1)
+        self._cmp_guard_class(op, arglocs, regalloc, c.HS)
+        self._emit_guard(op, arglocs[3:], c.EQ, save_exc=False)
+        return fcond
+
+    def _cmp_guard_class(self, op, locs, regalloc, fcond):
+        offset = locs[2]
+        if offset is not None:
+            self.mc.LDR_ri(r.ip.value, locs[0].value, offset.value, cond=fcond)
+            self.mc.CMP_rr(r.ip.value, locs[1].value, cond=fcond)
+        else:
+            typeid = locs[1]
+            self.mc.LDRH_ri(r.ip.value, locs[0].value, cond=fcond)
+            if typeid.is_imm():
+                self.mc.CMP_ri(r.ip.value, typeid.value, cond=fcond)
+            else:
+                self.mc.CMP_rr(r.ip.value, typeid.value, cond=fcond)
+
+    def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond):
+        return self._emit_guard(op, locs, fcond, save_exc=False,
+                                            is_guard_not_invalidated=True)
+
+    def emit_op_jump(self, op, arglocs, regalloc, fcond):
+        # The backend's logic assumes that the target code is in a piece of
+        # assembler that was also called with the same number of arguments,
+        # so that the locations [ebp+8..] of the input arguments are valid
+        # stack locations both before and after the jump.
+        #
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        assert fcond == c.AL
+        my_nbargs = self.current_clt._debug_nbargs
+        target_nbargs = descr._arm_clt._debug_nbargs
+        assert my_nbargs == target_nbargs
+
+        self._insert_checks()
+        if descr in self.target_tokens_currently_compiling:
+            self.mc.B_offs(descr._arm_loop_code, fcond)
+        else:
+            self.mc.B(descr._arm_loop_code, fcond)
+        return fcond
+
+    def emit_op_finish(self, op, arglocs, regalloc, fcond):
+        for i in range(len(arglocs) - 1):
+            loc = arglocs[i]
+            box = op.getarg(i)
+            if loc is None:
+                continue
+            if loc.is_reg():
+                if box.type == REF:
+                    adr = self.fail_boxes_ptr.get_addr_for_num(i)
+                elif box.type == INT:
+                    adr = self.fail_boxes_int.get_addr_for_num(i)
+                else:
+                    assert 0
+                self.mc.gen_load_int(r.ip.value, adr)
+                self.mc.STR_ri(loc.value, r.ip.value)
+            elif loc.is_vfp_reg():
+                assert box.type == FLOAT
+                adr = self.fail_boxes_float.get_addr_for_num(i)
+                self.mc.gen_load_int(r.ip.value, adr)
+                self.mc.VSTR(loc.value, r.ip.value)
+            elif loc.is_stack() or loc.is_imm() or loc.is_imm_float():
+                if box.type == FLOAT:
+                    adr = self.fail_boxes_float.get_addr_for_num(i)
+                    self.mov_loc_loc(loc, r.vfp_ip)
+                    self.mc.gen_load_int(r.ip.value, adr)
+                    self.mc.VSTR(r.vfp_ip.value, r.ip.value)
+                elif box.type == REF or box.type == INT:
+                    if box.type == REF:
+                        adr = self.fail_boxes_ptr.get_addr_for_num(i)
+                    elif box.type == INT:
+                        adr = self.fail_boxes_int.get_addr_for_num(i)
+                    else:
+                        assert 0
+                    self.mov_loc_loc(loc, r.ip)
+                    self.mc.gen_load_int(r.lr.value, adr)
+                    self.mc.STR_ri(r.ip.value, r.lr.value)
+            else:
+                assert 0
+        # note: no exception should currently be set in llop.get_exception_addr
+        # even if this finish may be an exit_frame_with_exception (in this case
+        # the exception instance is in arglocs[0]).
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=False)
+        self.mc.BL(addr)
+        self.mc.gen_load_int(r.r0.value, arglocs[-1].value)
+        self.gen_func_epilog()
+        return fcond
+
+    def emit_op_call(self, op, arglocs, regalloc, fcond,
+                                        force_index=NO_FORCE_INDEX):
+        if force_index == NO_FORCE_INDEX:
+            force_index = self.write_new_force_index()
+        resloc = arglocs[0]
+        adr = arglocs[1]
+        arglist = arglocs[2:]
+        descr = op.getdescr()
+        size = descr.get_result_size()
+        signed = descr.is_result_signed()
+        cond = self._emit_call(force_index, adr, arglist,
+                                            fcond, resloc, (size, signed))
+        return cond
+
+    def _emit_call(self, force_index, adr, arglocs, fcond=c.AL,
+                                         resloc=None, result_info=(-1, -1)):
+        if self.cpu.use_hf_abi:
+            stack_args, adr = self._setup_call_hf(force_index, adr,
+                                        arglocs, fcond, resloc, result_info)
+        else:
+            stack_args, adr = self._setup_call_sf(force_index, adr,
+                                        arglocs, fcond, resloc, result_info)
+
+        #the actual call
+        #self.mc.BKPT()
+        if adr.is_imm():
+            self.mc.BL(adr.value)
+        elif adr.is_stack():
+            self.mov_loc_loc(adr, r.ip)
+            adr = r.ip
+        else:
+            assert adr.is_reg()
+        if adr.is_reg():
+            self.mc.BLX(adr.value)
+        self.mark_gc_roots(force_index)
+        self._restore_sp(stack_args, fcond)
+
+        # ensure the result is wellformed and stored in the correct location
+        if resloc is not None:
+            if resloc.is_vfp_reg() and not self.cpu.use_hf_abi:
+                # move result to the allocated register
+                self.mov_to_vfp_loc(r.r0, r.r1, resloc)
+            elif resloc.is_reg() and result_info != (-1, -1):
+                self._ensure_result_bit_extension(resloc, result_info[0],
+                                                          result_info[1])
+        return fcond
+
+    def _restore_sp(self, stack_args, fcond):
+        # readjust the sp in case we passed some args on the stack
+        if len(stack_args) > 0:
+            n = 0
+            for arg in stack_args:
+                if arg is None or arg.type != FLOAT:
+                    n += WORD
+                else:
+                    n += DOUBLE_WORD
+            self._adjust_sp(-n, fcond=fcond)
+            assert n % 8 == 0  # sanity check
+
+    def _collect_stack_args_sf(self, arglocs):
+        n_args = len(arglocs)
+        reg_args = count_reg_args(arglocs)
+        # all arguments past the 4th go on the stack
+        # first we need to prepare the list so it stays aligned
+        stack_args = []
+        count = 0
+        if n_args > reg_args:
+            for i in range(reg_args, n_args):
+                arg = arglocs[i]
+                if arg.type != FLOAT:
+                    count += 1
+                else:
+                    if count % 2 != 0:
+                        stack_args.append(None)
+                        count = 0
+                stack_args.append(arg)
+            if count % 2 != 0:
+                stack_args.append(None)
+        return stack_args
+
+    def _push_stack_args(self, stack_args):
+            #then we push every thing on the stack
+            for i in range(len(stack_args) - 1, -1, -1):
+                arg = stack_args[i]
+                if arg is None:
+                    self.mc.PUSH([r.ip.value])
+                else:
+                    self.regalloc_push(arg)
+
+    def _setup_call_sf(self, force_index, adr, arglocs, fcond=c.AL,
+                                         resloc=None, result_info=(-1, -1)):
+        reg_args = count_reg_args(arglocs)
+        stack_args = self._collect_stack_args_sf(arglocs)
+        self._push_stack_args(stack_args)
+        # collect variables that need to go in registers and the registers they
+        # will be stored in
+        num = 0
+        count = 0
+        non_float_locs = []
+        non_float_regs = []
+        float_locs = []
+        for i in range(reg_args):
+            arg = arglocs[i]
+            if arg.type == FLOAT and count % 2 != 0:
+                    num += 1
+                    count = 0
+            reg = r.caller_resp[num]
+
+            if arg.type == FLOAT:
+                float_locs.append((arg, reg))
+            else:
+                non_float_locs.append(arg)
+                non_float_regs.append(reg)
+
+            if arg.type == FLOAT:
+                num += 2
+            else:
+                num += 1
+                count += 1
+        # Check that the address of the function we want to call is not
+        # currently stored in one of the registers used to pass the arguments.
+        # If this happens to be the case we remap the register to r4 and use r4
+        # to call the function
+        if adr in non_float_regs:
+            non_float_locs.append(adr)
+            non_float_regs.append(r.r4)
+            adr = r.r4
+        # remap values stored in core registers
+        remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
+
+        for loc, reg in float_locs:
+            self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
+        return stack_args, adr
+
+    def _setup_call_hf(self, force_index, adr, arglocs, fcond=c.AL,
+                                         resloc=None, result_info=(-1, -1)):
+        non_float_locs = []
+        non_float_regs = []
+        float_locs = []
+        float_regs = []
+        stack_args = []
+        count = 0                      # stack alignment counter
+        for arg in arglocs:
+            if arg.type != FLOAT:
+                if len(non_float_regs) < len(r.argument_regs):
+                    reg = r.argument_regs[len(non_float_regs)]
+                    non_float_locs.append(arg)
+                    non_float_regs.append(reg)
+                else:  # non-float argument that needs to go on the stack
+                    count += 1
+                    stack_args.append(arg)
+            else:
+                if len(float_regs) < len(r.vfp_argument_regs):
+                    reg = r.vfp_argument_regs[len(float_regs)]
+                    float_locs.append(arg)
+                    float_regs.append(reg)
+                else:  # float argument that needs to go on the stack
+                    if count % 2 != 0:
+                        stack_args.append(None)
+                        count = 0
+                    stack_args.append(arg)
+        # align the stack
+        if count % 2 != 0:
+            stack_args.append(None)
+        self._push_stack_args(stack_args)
+        # Check that the address of the function we want to call is not
+        # currently stored in one of the registers used to pass the arguments.
+        # If this happens to be the case we remap the register to r4 and use r4
+        # to call the function
+        if adr in non_float_regs:
+            non_float_locs.append(adr)
+            non_float_regs.append(r.r4)
+            adr = r.r4
+        # remap values stored in core registers
+        remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
+        # remap values stored in vfp registers
+        remap_frame_layout(self, float_locs, float_regs, r.vfp_ip)
+
+        return stack_args, adr
+
+    def emit_op_same_as(self, op, arglocs, regalloc, fcond):
+        argloc, resloc = arglocs
+        self.mov_loc_loc(argloc, resloc)
+        return fcond
+
+    emit_op_cast_ptr_to_int = emit_op_same_as
+    emit_op_cast_int_to_ptr = emit_op_same_as
+
+    def emit_op_guard_no_exception(self, op, arglocs, regalloc, fcond):
+        loc = arglocs[0]
+        failargs = arglocs[1:]
+        self.mc.LDR_ri(loc.value, loc.value)
+        self.mc.CMP_ri(loc.value, 0)
+        cond = self._emit_guard(op, failargs, c.EQ, save_exc=True)
+        return cond
+
+    def emit_op_guard_exception(self, op, arglocs, regalloc, fcond):
+        loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5]
+        failargs = arglocs[5:]
+        self.mc.gen_load_int(loc1.value, pos_exception.value)
+        self.mc.LDR_ri(r.ip.value, loc1.value)
+
+        self.mc.CMP_rr(r.ip.value, loc.value)
+        self._emit_guard(op, failargs, c.EQ, save_exc=True)
+        self.mc.gen_load_int(loc.value, pos_exc_value.value)
+        if resloc:
+            self.mc.LDR_ri(resloc.value, loc.value)
+        self.mc.MOV_ri(r.ip.value, 0)
+        self.mc.STR_ri(r.ip.value, loc.value)
+        self.mc.STR_ri(r.ip.value, loc1.value)
+        return fcond
+
+    def emit_op_debug_merge_point(self, op, arglocs, regalloc, fcond):
+        return fcond
+    emit_op_jit_debug = emit_op_debug_merge_point
+    emit_op_keepalive = emit_op_debug_merge_point
+
+    def emit_op_cond_call_gc_wb(self, op, arglocs, regalloc, fcond):
+        # Write code equivalent to write_barrier() in the GC: it checks
+        # a flag in the object at arglocs[0], and if set, it calls a
+        # helper piece of assembler.  The latter saves registers as needed
+        # and call the function jit_remember_young_pointer() from the GC.
+        descr = op.getdescr()
+        if we_are_translated():
+            cls = self.cpu.gc_ll_descr.has_write_barrier_class()
+            assert cls is not None and isinstance(descr, cls)
+        #
+        opnum = op.getopnum()
+        card_marking = False
+        mask = descr.jit_wb_if_flag_singlebyte
+        if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+            # assumptions the rest of the function depends on:
+            assert (descr.jit_wb_cards_set_byteofs ==
+                    descr.jit_wb_if_flag_byteofs)
+            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            card_marking = True
+            mask = descr.jit_wb_if_flag_singlebyte | -0x80
+        #
+        loc_base = arglocs[0]
+        self.mc.LDRB_ri(r.ip.value, loc_base.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+        mask &= 0xFF
+        self.mc.TST_ri(r.ip.value, imm=mask)
+
+        jz_location = self.mc.currpos()
+        self.mc.BKPT()
+
+        # for cond_call_gc_wb_array, also add another fast path:
+        # if GCFLAG_CARDS_SET, then we can just set one bit and be done
+        if card_marking:
+            # GCFLAG_CARDS_SET is in this byte at 0x80
+            self.mc.TST_ri(r.ip.value, imm=0x80)
+
+            js_location = self.mc.currpos()
+            self.mc.BKPT()
+        else:
+            js_location = 0
+
+        # Write only a CALL to the helper prepared in advance, passing it as
+        # argument the address of the structure we are writing into
+        # (the first argument to COND_CALL_GC_WB).
+        helper_num = card_marking
+        if self._regalloc.vfprm.reg_bindings:
+            helper_num += 2
+        if self.wb_slowpath[helper_num] == 0:    # tests only
+            assert not we_are_translated()
+            self.cpu.gc_ll_descr.write_barrier_descr = descr
+            self._build_wb_slowpath(card_marking,
+                                    bool(self._regalloc.vfprm.reg_bindings))
+            assert self.wb_slowpath[helper_num] != 0
+        #
+        if loc_base is not r.r0:
+            # push two registers to keep stack aligned
+            self.mc.PUSH([r.r0.value, loc_base.value])
+            remap_frame_layout(self, [loc_base], [r.r0], r.ip)
+        self.mc.BL(self.wb_slowpath[helper_num])
+        if loc_base is not r.r0:
+            self.mc.POP([r.r0.value, loc_base.value])
+
+        if card_marking:
+            # The helper ends again with a check of the flag in the object.  So
+            # here, we can simply write again a conditional jump, which will be
+            # taken if GCFLAG_CARDS_SET is still not set.
+            jns_location = self.mc.currpos()
+            self.mc.BKPT()
+            #
+            # patch the JS above
+            offset = self.mc.currpos()
+            pmc = OverwritingBuilder(self.mc, js_location, WORD)
+            pmc.B_offs(offset, c.NE)  # We want to jump if the z flag isn't set
+            #
+            # case GCFLAG_CARDS_SET: emit a few instructions to do
+            # directly the card flag setting
+            loc_index = arglocs[1]
+            assert loc_index.is_reg()
+            # must save the register loc_index before it is mutated
+            self.mc.PUSH([loc_index.value])
+            tmp1 = loc_index
+            tmp2 = arglocs[2]
+            # lr = byteofs
+            s = 3 + descr.jit_wb_card_page_shift
+            self.mc.MVN_rr(r.lr.value, loc_index.value,
+                                       imm=s, shifttype=shift.LSR)
+
+            # tmp1 = byte_index
+            self.mc.MOV_ri(r.ip.value, imm=7)
+            self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
+            imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+
+            # set the bit
+            self.mc.MOV_ri(tmp2.value, imm=1)
+            self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
+            self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
+                                          tmp1.value, shifttype=shift.LSL)
+            self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+            # done
+            self.mc.POP([loc_index.value])
+            #
+            #
+            # patch the JNS above
+            offset = self.mc.currpos()
+            pmc = OverwritingBuilder(self.mc, jns_location, WORD)
+            pmc.B_offs(offset, c.EQ)  # We want to jump if the z flag is set
+
+        offset = self.mc.currpos()
+        pmc = OverwritingBuilder(self.mc, jz_location, WORD)
+        pmc.B_offs(offset, c.EQ)
+        return fcond
+
+    emit_op_cond_call_gc_wb_array = emit_op_cond_call_gc_wb
+
+    def emit_op_setfield_gc(self, op, arglocs, regalloc, fcond):
+        value_loc, base_loc, ofs, size = arglocs
+        if size.value == 8:
+            assert value_loc.is_vfp_reg()
+            # vstr only supports imm offsets
+            # so if the ofset is too large we add it to the base and use an
+            # offset of 0
+            if ofs.is_reg():
+                self.mc.ADD_rr(r.ip.value, base_loc.value, ofs.value)
+                base_loc = r.ip
+                ofs = imm(0)
+            else:
+                assert ofs.value % 4 == 0
+            self.mc.VSTR(value_loc.value, base_loc.value, ofs.value)
+        elif size.value == 4:
+            if ofs.is_imm():
+                self.mc.STR_ri(value_loc.value, base_loc.value, ofs.value)
+            else:
+                self.mc.STR_rr(value_loc.value, base_loc.value, ofs.value)
+        elif size.value == 2:
+            if ofs.is_imm():
+                self.mc.STRH_ri(value_loc.value, base_loc.value, ofs.value)
+            else:
+                self.mc.STRH_rr(value_loc.value, base_loc.value, ofs.value)
+        elif size.value == 1:
+            if ofs.is_imm():
+                self.mc.STRB_ri(value_loc.value, base_loc.value, ofs.value)
+            else:
+                self.mc.STRB_rr(value_loc.value, base_loc.value, ofs.value)
+        else:
+            assert 0
+        return fcond
+
+    emit_op_setfield_raw = emit_op_setfield_gc
+
+    def emit_op_getfield_gc(self, op, arglocs, regalloc, fcond):
+        base_loc, ofs, res, size = arglocs
+        signed = op.getdescr().is_field_signed()
+        if size.value == 8:
+            assert res.is_vfp_reg()
+            # vldr only supports imm offsets
+            # so if the ofset is too large we add it to the base and use an
+            # offset of 0
+            if ofs.is_reg():
+                self.mc.ADD_rr(r.ip.value, base_loc.value, ofs.value)
+                base_loc = r.ip
+                ofs = imm(0)
+            else:
+                assert ofs.value % 4 == 0
+            self.mc.VLDR(res.value, base_loc.value, ofs.value)
+        elif size.value == 4:
+            if ofs.is_imm():
+                self.mc.LDR_ri(res.value, base_loc.value, ofs.value)
+            else:
+                self.mc.LDR_rr(res.value, base_loc.value, ofs.value)
+        elif size.value == 2:
+            if ofs.is_imm():
+                if signed:
+                    self.mc.LDRSH_ri(res.value, base_loc.value, ofs.value)
+                else:
+                    self.mc.LDRH_ri(res.value, base_loc.value, ofs.value)
+            else:
+                if signed:
+                    self.mc.LDRSH_rr(res.value, base_loc.value, ofs.value)
+                else:
+                    self.mc.LDRH_rr(res.value, base_loc.value, ofs.value)
+        elif size.value == 1:
+            if ofs.is_imm():
+                if signed:
+                    self.mc.LDRSB_ri(res.value, base_loc.value, ofs.value)
+                else:
+                    self.mc.LDRB_ri(res.value, base_loc.value, ofs.value)
+            else:
+                if signed:
+                    self.mc.LDRSB_rr(res.value, base_loc.value, ofs.value)
+                else:
+                    self.mc.LDRB_rr(res.value, base_loc.value, ofs.value)
+        else:
+            assert 0
+        return fcond
+
+    emit_op_getfield_raw = emit_op_getfield_gc
+    emit_op_getfield_raw_pure = emit_op_getfield_gc
+    emit_op_getfield_gc_pure = emit_op_getfield_gc
+
+    def emit_op_getinteriorfield_gc(self, op, arglocs, regalloc, fcond):
+        (base_loc, index_loc, res_loc,
+            ofs_loc, ofs, itemsize, fieldsize) = arglocs
+        self.mc.gen_load_int(r.ip.value, itemsize.value)
+        self.mc.MUL(r.ip.value, index_loc.value, r.ip.value)
+        descr = op.getdescr()
+        assert isinstance(descr, InteriorFieldDescr)
+        signed = descr.fielddescr.is_field_signed()
+        if ofs.value > 0:
+            if ofs_loc.is_imm():
+                self.mc.ADD_ri(r.ip.value, r.ip.value, ofs_loc.value)
+            else:
+                self.mc.ADD_rr(r.ip.value, r.ip.value, ofs_loc.value)
+
+        if fieldsize.value == 8:
+            # vldr only supports imm offsets
+            # so if the ofset is too large we add it to the base and use an
+            # offset of 0
+            assert res_loc.is_vfp_reg()
+            self.mc.ADD_rr(r.ip.value, base_loc.value, r.ip.value)
+            self.mc.VLDR(res_loc.value, r.ip.value, 0)
+        elif fieldsize.value == 4:
+            self.mc.LDR_rr(res_loc.value, base_loc.value, r.ip.value)
+        elif fieldsize.value == 2:
+            if signed:
+                self.mc.LDRSH_rr(res_loc.value, base_loc.value, r.ip.value)
+            else:
+                self.mc.LDRH_rr(res_loc.value, base_loc.value, r.ip.value)
+        elif fieldsize.value == 1:
+            if signed:
+                self.mc.LDRSB_rr(res_loc.value, base_loc.value, r.ip.value)
+            else:
+                self.mc.LDRB_rr(res_loc.value, base_loc.value, r.ip.value)
+        else:
+            assert 0
+
+        return fcond
+
+    def emit_op_setinteriorfield_gc(self, op, arglocs, regalloc, fcond):
+        (base_loc, index_loc, value_loc,
+            ofs_loc, ofs, itemsize, fieldsize) = arglocs
+        self.mc.gen_load_int(r.ip.value, itemsize.value)
+        self.mc.MUL(r.ip.value, index_loc.value, r.ip.value)
+        if ofs.value > 0:
+            if ofs_loc.is_imm():
+                self.mc.ADD_ri(r.ip.value, r.ip.value, ofs_loc.value)
+            else:
+                self.mc.ADD_rr(r.ip.value, r.ip.value, ofs_loc.value)
+        if fieldsize.value == 8:
+            # vstr only supports imm offsets
+            # so if the ofset is too large we add it to the base and use an
+            # offset of 0
+            assert value_loc.is_vfp_reg()
+            self.mc.ADD_rr(r.ip.value, base_loc.value, r.ip.value)
+            self.mc.VSTR(value_loc.value, r.ip.value, 0)
+        elif fieldsize.value == 4:
+            self.mc.STR_rr(value_loc.value, base_loc.value, r.ip.value)
+        elif fieldsize.value == 2:
+            self.mc.STRH_rr(value_loc.value, base_loc.value, r.ip.value)
+        elif fieldsize.value == 1:
+            self.mc.STRB_rr(value_loc.value, base_loc.value, r.ip.value)
+        else:
+            assert 0
+        return fcond
+    emit_op_setinteriorfield_raw = emit_op_setinteriorfield_gc
+
+    def emit_op_arraylen_gc(self, op, arglocs, regalloc, fcond):
+        res, base_loc, ofs = arglocs
+        self.mc.LDR_ri(res.value, base_loc.value, ofs.value)
+        return fcond
+
+    def emit_op_setarrayitem_gc(self, op, arglocs, regalloc, fcond):
+        value_loc, base_loc, ofs_loc, scale, ofs = arglocs
+        assert ofs_loc.is_reg()
+        if scale.value > 0:
+            self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value)
+            ofs_loc = r.ip
+
+        # add the base offset
+        if ofs.value > 0:
+            self.mc.ADD_ri(r.ip.value, ofs_loc.value, imm=ofs.value)
+            ofs_loc = r.ip
+        self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond)
+        return fcond
+
+    def _write_to_mem(self, value_loc, base_loc, ofs_loc, scale, fcond=c.AL):
+        if scale.value == 3:
+            assert value_loc.is_vfp_reg()
+            assert ofs_loc.is_reg()
+            self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value)
+            self.mc.VSTR(value_loc.value, r.ip.value, cond=fcond)
+        elif scale.value == 2:
+            self.mc.STR_rr(value_loc.value, base_loc.value, ofs_loc.value,
+                                                                    cond=fcond)
+        elif scale.value == 1:
+            self.mc.STRH_rr(value_loc.value, base_loc.value, ofs_loc.value,
+                                                                    cond=fcond)
+        elif scale.value == 0:
+            self.mc.STRB_rr(value_loc.value, base_loc.value, ofs_loc.value,
+                                                                    cond=fcond)
+        else:
+            assert 0
+
+    emit_op_setarrayitem_raw = emit_op_setarrayitem_gc
+
+    def emit_op_raw_store(self, op, arglocs, regalloc, fcond):
+        value_loc, base_loc, ofs_loc, scale, ofs = arglocs
+        assert ofs_loc.is_reg()
+        self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond)
+        return fcond
+
+    def emit_op_getarrayitem_gc(self, op, arglocs, regalloc, fcond):
+        res_loc, base_loc, ofs_loc, scale, ofs = arglocs
+        assert ofs_loc.is_reg()
+        signed = op.getdescr().is_item_signed()
+
+        # scale the offset as required
+        if scale.value > 0:
+            self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value)
+            ofs_loc = r.ip
+        # add the base offset
+        if ofs.value > 0:
+            self.mc.ADD_ri(r.ip.value, ofs_loc.value, imm=ofs.value)
+            ofs_loc = r.ip
+        #
+        self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed)
+        return fcond
+
+    def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale,
+                                            signed=False, fcond=c.AL):
+        if scale.value == 3:
+            assert res_loc.is_vfp_reg()
+            assert ofs_loc.is_reg()
+            self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value)
+            self.mc.VLDR(res_loc.value, r.ip.value, cond=fcond)
+        elif scale.value == 2:
+            self.mc.LDR_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
+        elif scale.value == 1:
+            if signed:
+                self.mc.LDRSH_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
+            else:
+                self.mc.LDRH_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
+        elif scale.value == 0:
+            if signed:
+                self.mc.LDRSB_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
+            else:
+                self.mc.LDRB_rr(res_loc.value, base_loc.value,
+                                 ofs_loc.value, cond=fcond)
+        else:
+            assert 0
+
+    emit_op_getarrayitem_raw = emit_op_getarrayitem_gc
+    emit_op_getarrayitem_gc_pure = emit_op_getarrayitem_gc
+
+    def emit_op_raw_load(self, op, arglocs, regalloc, fcond):
+        res_loc, base_loc, ofs_loc, scale, ofs = arglocs
+        assert ofs_loc.is_reg()
+        # no base offset
+        assert ofs.value == 0
+        signed = op.getdescr().is_item_signed()
+        self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed)
+        return fcond
+
+    def emit_op_strlen(self, op, arglocs, regalloc, fcond):
+        l0, l1, res = arglocs
+        if l1.is_imm():
+            self.mc.LDR_ri(res.value, l0.value, l1.getint(), cond=fcond)
+        else:
+            self.mc.LDR_rr(res.value, l0.value, l1.value, cond=fcond)
+        return fcond
+
+    def emit_op_strgetitem(self, op, arglocs, regalloc, fcond):
+        res, base_loc, ofs_loc, basesize = arglocs
+        if ofs_loc.is_imm():
+            self.mc.ADD_ri(r.ip.value, base_loc.value, ofs_loc.getint(),
+                                                                    cond=fcond)
+        else:
+            self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value,
+                                                                    cond=fcond)
+
+        self.mc.LDRB_ri(res.value, r.ip.value, basesize.value, cond=fcond)
+        return fcond
+
+    def emit_op_strsetitem(self, op, arglocs, regalloc, fcond):
+        value_loc, base_loc, ofs_loc, basesize = arglocs
+        if ofs_loc.is_imm():
+            self.mc.ADD_ri(r.ip.value, base_loc.value, ofs_loc.getint(),
+                                                            cond=fcond)
+        else:
+            self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value,
+                                                            cond=fcond)
+
+        self.mc.STRB_ri(value_loc.value, r.ip.value, basesize.value,
+                                                            cond=fcond)
+        return fcond
+
+    #from ../x86/regalloc.py:928 ff.
+    def emit_op_copystrcontent(self, op, arglocs, regalloc, fcond):
+        assert len(arglocs) == 0
+        self._emit_copystrcontent(op, regalloc, fcond, is_unicode=False)
+        return fcond
+
+    def emit_op_copyunicodecontent(self, op, arglocs, regalloc, fcond):
+        assert len(arglocs) == 0
+        self._emit_copystrcontent(op, regalloc, fcond, is_unicode=True)
+        return fcond
+
+    def _emit_copystrcontent(self, op, regalloc, fcond, is_unicode):
+        # compute the source address
+        args = op.getarglist()
+        base_loc = regalloc._ensure_value_is_boxed(args[0], args)
+        ofs_loc = regalloc._ensure_value_is_boxed(args[2], args)
+        assert args[0] is not args[1]    # forbidden case of aliasing
+        regalloc.possibly_free_var(args[0])
+        regalloc.free_temp_vars()
+        if args[3] is not args[2] is not args[4]:  # MESS MESS MESS: don't free
+            regalloc.possibly_free_var(args[2])  # it if ==args[3] or args[4]
+            regalloc.free_temp_vars()
+        srcaddr_box = TempPtr()
+        forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
+        srcaddr_loc = regalloc.force_allocate_reg(srcaddr_box,
+                                                        selected_reg=r.r1)
+        self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
+                                        is_unicode=is_unicode)
+
+        # compute the destination address
+        forbidden_vars = [args[4], args[3], srcaddr_box]
+        dstaddr_box = TempPtr()
+        dstaddr_loc = regalloc.force_allocate_reg(dstaddr_box,
+                                                        selected_reg=r.r0)
+        forbidden_vars.append(dstaddr_box)
+        base_loc = regalloc._ensure_value_is_boxed(args[1], forbidden_vars)
+        ofs_loc = regalloc._ensure_value_is_boxed(args[3], forbidden_vars)
+        assert base_loc.is_reg()
+        assert ofs_loc.is_reg()
+        regalloc.possibly_free_var(args[1])
+        if args[3] is not args[4]:     # more of the MESS described above
+            regalloc.possibly_free_var(args[3])
+        regalloc.free_temp_vars()
+        self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
+                                        is_unicode=is_unicode)
+
+        # compute the length in bytes
+        forbidden_vars = [srcaddr_box, dstaddr_box]
+        # XXX basically duplicates regalloc.ensure_value_is_boxed, but we
+        # need the box here
+        if isinstance(args[4], Box):
+            length_box = args[4]
+            length_loc = regalloc._ensure_value_is_boxed(args[4],
+                                                        forbidden_vars)
+        else:
+            length_box = TempInt()
+            length_loc = regalloc.force_allocate_reg(length_box,
+                                        forbidden_vars, selected_reg=r.r2)
+            immloc = regalloc.convert_to_imm(args[4])
+            self.load(length_loc, immloc)
+        if is_unicode:
+            bytes_box = TempPtr()
+            bytes_loc = regalloc.force_allocate_reg(bytes_box,
+                                        forbidden_vars, selected_reg=r.r2)
+            scale = self._get_unicode_item_scale()
+            assert length_loc.is_reg()
+            self.mc.MOV_ri(r.ip.value, 1 << scale)
+            self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
+            length_box = bytes_box
+            length_loc = bytes_loc
+        # call memcpy()
+        regalloc.before_call()
+        self._emit_call(NO_FORCE_INDEX, imm(self.memcpy_addr),
+                            [dstaddr_loc, srcaddr_loc, length_loc])
+
+        regalloc.possibly_free_var(length_box)
+        regalloc.possibly_free_var(dstaddr_box)
+        regalloc.possibly_free_var(srcaddr_box)
+
+    def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
+        if is_unicode:
+            ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
+                                              self.cpu.translate_support_code)
+            scale = self._get_unicode_item_scale()
+        else:
+            ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
+                                              self.cpu.translate_support_code)
+            assert itemsize == 1
+            scale = 0
+        self._gen_address(ofsloc, ofs_items, scale, resloc, baseloc)
+
+    def _gen_address(self, sizereg, baseofs, scale, result, baseloc=None):
+        assert sizereg.is_reg()
+        if scale > 0:
+            scaled_loc = r.ip
+            self.mc.LSL_ri(r.ip.value, sizereg.value, scale)
+        else:
+            scaled_loc = sizereg
+        if baseloc is not None:
+            assert baseloc.is_reg()
+            self.mc.ADD_rr(result.value, baseloc.value, scaled_loc.value)
+            self.mc.ADD_ri(result.value, result.value, baseofs)
+        else:
+            self.mc.ADD_ri(result.value, scaled_loc.value, baseofs)
+
+    def _get_unicode_item_scale(self):
+        _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
+                                              self.cpu.translate_support_code)
+        if itemsize == 4:
+            return 2
+        elif itemsize == 2:
+            return 1
+        else:
+            raise AssertionError("bad unicode item size")
+
+    emit_op_unicodelen = emit_op_strlen
+
+    def emit_op_unicodegetitem(self, op, arglocs, regalloc, fcond):
+        res, base_loc, ofs_loc, scale, basesize, itemsize = arglocs
+        self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value, cond=fcond,
+                                        imm=scale.value, shifttype=shift.LSL)
+        if scale.value == 2:
+            self.mc.LDR_ri(res.value, r.ip.value, basesize.value, cond=fcond)
+        elif scale.value == 1:
+            self.mc.LDRH_ri(res.value, r.ip.value, basesize.value, cond=fcond)
+        else:
+            assert 0, itemsize.value
+        return fcond
+
+    def emit_op_unicodesetitem(self, op, arglocs, regalloc, fcond):
+        value_loc, base_loc, ofs_loc, scale, basesize, itemsize = arglocs
+        self.mc.ADD_rr(r.ip.value, base_loc.value, ofs_loc.value, cond=fcond,
+                                        imm=scale.value, shifttype=shift.LSL)
+        if scale.value == 2:
+            self.mc.STR_ri(value_loc.value, r.ip.value, basesize.value,
+                                                                    cond=fcond)
+        elif scale.value == 1:
+            self.mc.STRH_ri(value_loc.value, r.ip.value, basesize.value,
+                                                                    cond=fcond)
+        else:
+            assert 0, itemsize.value
+
+        return fcond
+
+    def emit_op_force_token(self, op, arglocs, regalloc, fcond):
+        res_loc = arglocs[0]
+        self.mc.MOV_rr(res_loc.value, r.fp.value)
+        return fcond
+
+    # from: ../x86/assembler.py:1668
+    # XXX Split into some helper methods
+    def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc,
+                                                                    fcond):
+        tmploc = arglocs[1]
+        resloc = arglocs[2]
+        callargs = arglocs[3:]
+
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index)
+        descr = op.getdescr()
+        assert isinstance(descr, JitCellToken)
+        # check value
+        assert tmploc is r.r0
+        self._emit_call(fail_index, imm(descr._arm_func_addr),
+                                callargs, fcond, resloc=tmploc)
+        if op.result is None:
+            value = self.cpu.done_with_this_frame_void_v
+        else:
+            kind = op.result.type
+            if kind == INT:
+                value = self.cpu.done_with_this_frame_int_v
+            elif kind == REF:
+                value = self.cpu.done_with_this_frame_ref_v
+            elif kind == FLOAT:
+                value = self.cpu.done_with_this_frame_float_v
+            else:
+                raise AssertionError(kind)
+        self.mc.gen_load_int(r.ip.value, value)
+        self.mc.CMP_rr(tmploc.value, r.ip.value)
+
+        #if values are equal we take the fast path
+        # Slow path, calling helper
+        # jump to merge point
+
+        jd = descr.outermost_jitdriver_sd
+        assert jd is not None
+
+        # Path A: load return value and reset token
+        # Fast Path using result boxes
+
+        fast_path_cond = c.EQ
+        # Reset the vable token --- XXX really too much special logic here:-(
+        if jd.index_of_virtualizable >= 0:
+            from pypy.jit.backend.llsupport.descr import FieldDescr
+            fielddescr = jd.vable_token_descr
+            assert isinstance(fielddescr, FieldDescr)
+            ofs = fielddescr.offset
+            tmploc = regalloc.get_scratch_reg(INT)
+            self.mov_loc_loc(arglocs[0], r.ip, cond=fast_path_cond)
+            self.mc.MOV_ri(tmploc.value, 0, cond=fast_path_cond)
+            self.mc.STR_ri(tmploc.value, r.ip.value, ofs, cond=fast_path_cond)
+
+        if op.result is not None:
+            # load the return value from fail_boxes_xxx[0]
+            kind = op.result.type
+            if kind == INT:
+                adr = self.fail_boxes_int.get_addr_for_num(0)
+            elif kind == REF:
+                adr = self.fail_boxes_ptr.get_addr_for_num(0)
+            elif kind == FLOAT:
+                adr = self.fail_boxes_float.get_addr_for_num(0)
+            else:
+                raise AssertionError(kind)
+            self.mc.gen_load_int(r.ip.value, adr, cond=fast_path_cond)
+            if op.result.type == FLOAT:
+                self.mc.VLDR(resloc.value, r.ip.value, cond=fast_path_cond)
+            else:
+                self.mc.LDR_ri(resloc.value, r.ip.value, cond=fast_path_cond)
+        # jump to merge point
+        jmp_pos = self.mc.currpos()
+        self.mc.BKPT()
+
+        # Path B: use assembler helper
+        asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
+        if self.cpu.supports_floats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        # in case the call has a result we do not need to save the
+        # corresponding result register because it was already allocated for
+        # the result
+        core = r.caller_resp
+        if op.result:
+            if resloc.is_vfp_reg():
+                floats = r.caller_vfp_resp[1:]
+            else:
+                core = r.caller_resp[1:] + [r.ip]  # keep alignment
+        with saved_registers(self.mc, core, floats):
+            # result of previous call is in r0
+            self.mov_loc_loc(arglocs[0], r.r1)
+            self.mc.BL(asm_helper_adr)
+            if not self.cpu.use_hf_abi and op.result and resloc.is_vfp_reg():
+                # move result to the allocated register
+                self.mov_to_vfp_loc(r.r0, r.r1, resloc)
+
+        # merge point
+        currpos = self.mc.currpos()
+        pmc = OverwritingBuilder(self.mc, jmp_pos, WORD)
+        pmc.B_offs(currpos, fast_path_cond)
+
+        self.mc.LDR_ri(r.ip.value, r.fp.value)
+        self.mc.CMP_ri(r.ip.value, 0)
+
+        self._emit_guard(guard_op, regalloc._prepare_guard(guard_op),
+                                                    c.GE, save_exc=True)
+        return fcond
+
+    # ../x86/assembler.py:668
+    def redirect_call_assembler(self, oldlooptoken, newlooptoken):
+        # some minimal sanity checking
+        old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
+        new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
+        assert old_nbargs == new_nbargs
+        # we overwrite the instructions at the old _arm_func_adddr
+        # to start with a JMP to the new _arm_func_addr.
+        # Ideally we should rather patch all existing CALLs, but well.
+        oldadr = oldlooptoken._arm_func_addr
+        target = newlooptoken._arm_func_addr
+        mc = ARMv7Builder()
+        mc.B(target)
+        mc.copy_to_raw_memory(oldadr)
+
+    def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc,
+                                                                    fcond):
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index)
+        numargs = op.numargs()
+        callargs = arglocs[2:numargs + 1]  # extract the arguments to the call
+        adr = arglocs[1]
+        resloc = arglocs[0]
+        #
+        descr = op.getdescr()
+        size = descr.get_result_size()
+        signed = descr.is_result_signed()
+        #
+        self._emit_call(fail_index, adr, callargs, fcond,
+                                    resloc, (size, signed))
+
+        self.mc.LDR_ri(r.ip.value, r.fp.value)
+        self.mc.CMP_ri(r.ip.value, 0)
+        self._emit_guard(guard_op, arglocs[1 + numargs:], c.GE, save_exc=True)
+        return fcond
+
+    def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
+                                                                    fcond):
+
+        # first, close the stack in the sense of the asmgcc GC root tracker
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        numargs = op.numargs()
+        callargs = arglocs[2:numargs + 1]  # extract the arguments to the call
+        adr = arglocs[1]
+        resloc = arglocs[0]
+
+        if gcrootmap:
+            self.call_release_gil(gcrootmap, arglocs, fcond)
+        # do the call
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index)
+        #
+        descr = op.getdescr()
+        size = descr.get_result_size()
+        signed = descr.is_result_signed()
+        #
+        self._emit_call(fail_index, adr, callargs, fcond,
+                                    resloc, (size, signed))
+        # then reopen the stack
+        if gcrootmap:
+            self.call_reacquire_gil(gcrootmap, resloc, fcond)
+
+        self.mc.LDR_ri(r.ip.value, r.fp.value)
+        self.mc.CMP_ri(r.ip.value, 0)
+
+        self._emit_guard(guard_op, arglocs[1 + numargs:], c.GE, save_exc=True)
+        return fcond
+
+    def call_release_gil(self, gcrootmap, save_registers, fcond):
+        # First, we need to save away the registers listed in
+        # 'save_registers' that are not callee-save.
+        # NOTE: We assume that  the floating point registers won't be modified.
+        regs_to_save = []
+        for reg in self._regalloc.rm.save_around_call_regs:
+            if reg in save_registers:
+                regs_to_save.append(reg)
+        assert gcrootmap.is_shadow_stack
+        with saved_registers(self.mc, regs_to_save):
+            self._emit_call(NO_FORCE_INDEX,
+                                imm(self.releasegil_addr), [], fcond)
+
+    def call_reacquire_gil(self, gcrootmap, save_loc, fcond):
+        # save the previous result into the stack temporarily.
+        # NOTE: like with call_release_gil(), we assume that we don't need to
+        # save vfp regs in this case. Besides the result location
+        regs_to_save = []
+        vfp_regs_to_save = []
+        if save_loc.is_reg():
+            regs_to_save.append(save_loc)
+        if save_loc.is_vfp_reg():
+            vfp_regs_to_save.append(save_loc)
+        # call the reopenstack() function (also reacquiring the GIL)
+        if len(regs_to_save) % 2 != 1:
+            regs_to_save.append(r.ip)  # for alingment
+        assert gcrootmap.is_shadow_stack
+        with saved_registers(self.mc, regs_to_save, vfp_regs_to_save):
+            self._emit_call(NO_FORCE_INDEX, imm(self.reacqgil_addr), [], fcond)
+
+    def write_new_force_index(self):
+        # for shadowstack only: get a new, unused force_index number and
+        # write it to FORCE_INDEX_OFS.  Used to record the call shape
+        # (i.e. where the GC pointers are in the stack) around a CALL
+        # instruction that doesn't already have a force_index.
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            clt = self.current_clt
+            force_index = clt.reserve_and_record_some_faildescr_index()
+            self._write_fail_index(force_index)
+            return force_index
+        else:
+            return 0
+
+    def _write_fail_index(self, fail_index):
+        self.mc.gen_load_int(r.ip.value, fail_index)
+        self.mc.STR_ri(r.ip.value, r.fp.value)
+
+    def emit_op_call_malloc_gc(self, op, arglocs, regalloc, fcond):
+        self.emit_op_call(op, arglocs, regalloc, fcond)
+        self.propagate_memoryerror_if_r0_is_null()
+        self._alignment_check()
+        return fcond
+
+    def emit_op_call_malloc_nursery(self, op, arglocs, regalloc, fcond):
+        # registers r0 and r1 are allocated for this call
+        assert len(arglocs) == 1
+        size = arglocs[0].value
+        gc_ll_descr = self.cpu.gc_ll_descr
+        self.malloc_cond(
+            gc_ll_descr.get_nursery_free_addr(),
+            gc_ll_descr.get_nursery_top_addr(),
+            size
+            )
+        self._alignment_check()
+        return fcond
+
+    def _alignment_check(self):
+        if not self.debug:
+            return
+        self.mc.MOV_rr(r.ip.value, r.r0.value)
+        self.mc.AND_ri(r.ip.value, r.ip.value, 3)
+        self.mc.CMP_ri(r.ip.value, 0)
+        self.mc.MOV_rr(r.pc.value, r.pc.value, cond=c.EQ)
+        self.mc.BKPT()
+        self.mc.NOP()
+
+    emit_op_float_add = gen_emit_float_op('float_add', 'VADD')
+    emit_op_float_sub = gen_emit_float_op('float_sub', 'VSUB')
+    emit_op_float_mul = gen_emit_float_op('float_mul', 'VMUL')
+    emit_op_float_truediv = gen_emit_float_op('float_truediv', 'VDIV')
+
+    emit_op_float_neg = gen_emit_unary_float_op('float_neg', 'VNEG')
+    emit_op_float_abs = gen_emit_unary_float_op('float_abs', 'VABS')
+    emit_op_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT')
+
+    emit_op_float_lt = gen_emit_float_cmp_op('float_lt', c.VFP_LT)
+    emit_op_float_le = gen_emit_float_cmp_op('float_le', c.VFP_LE)
+    emit_op_float_eq = gen_emit_float_cmp_op('float_eq', c.EQ)
+    emit_op_float_ne = gen_emit_float_cmp_op('float_ne', c.NE)
+    emit_op_float_gt = gen_emit_float_cmp_op('float_gt', c.GT)
+    emit_op_float_ge = gen_emit_float_cmp_op('float_ge', c.GE)
+
+    emit_guard_float_lt = gen_emit_float_cmp_op_guard('float_lt', c.VFP_LT)
+    emit_guard_float_le = gen_emit_float_cmp_op_guard('float_le', c.VFP_LE)
+    emit_guard_float_eq = gen_emit_float_cmp_op_guard('float_eq', c.EQ)
+    emit_guard_float_ne = gen_emit_float_cmp_op_guard('float_ne', c.NE)
+    emit_guard_float_gt = gen_emit_float_cmp_op_guard('float_gt', c.GT)
+    emit_guard_float_ge = gen_emit_float_cmp_op_guard('float_ge', c.GE)
+
+    def emit_op_cast_float_to_int(self, op, arglocs, regalloc, fcond):
+        arg, res = arglocs
+        assert arg.is_vfp_reg()
+        assert res.is_reg()
+        self.mc.VCVT_float_to_int(r.vfp_ip.value, arg.value)
+        self.mc.VMOV_rc(res.value, r.ip.value, r.vfp_ip.value)
+        return fcond
+
+    def emit_op_cast_int_to_float(self, op, arglocs, regalloc, fcond):
+        arg, res = arglocs
+        assert res.is_vfp_reg()
+        assert arg.is_reg()
+        self.mc.MOV_ri(r.ip.value, 0)
+        self.mc.VMOV_cr(res.value, arg.value, r.ip.value)
+        self.mc.VCVT_int_to_float(res.value, res.value)
+        return fcond
+
+    emit_op_llong_add = gen_emit_float_op('llong_add', 'VADD_i64')
+    emit_op_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64')
+    emit_op_llong_and = gen_emit_float_op('llong_and', 'VAND_i64')
+    emit_op_llong_or = gen_emit_float_op('llong_or', 'VORR_i64')
+    emit_op_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64')
+
+    def emit_op_llong_to_int(self, op, arglocs, regalloc, fcond):
+        loc = arglocs[0]
+        res = arglocs[1]
+        assert loc.is_vfp_reg()
+        assert res.is_reg()
+        self.mc.VMOV_rc(res.value, r.ip.value, loc.value)
+        return fcond
+
+    emit_op_convert_float_bytes_to_longlong = gen_emit_unary_float_op(
+                                    'float_bytes_to_longlong', 'VMOV_cc')
+    emit_op_convert_longlong_bytes_to_float = gen_emit_unary_float_op(
+                                    'longlong_bytes_to_float', 'VMOV_cc')
+
+    def emit_op_read_timestamp(self, op, arglocs, regalloc, fcond):
+        assert 0, 'not supported'
+        tmp = arglocs[0]
+        res = arglocs[1]
+        self.mc.MRC(15, 0, tmp.value, 15, 12, 1)
+        self.mc.MOV_ri(r.ip.value, 0)
+        self.mc.VMOV_cr(res.value, tmp.value, r.ip.value)
+        return fcond
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -0,0 +1,1291 @@
+from pypy.jit.backend.llsupport.regalloc import FrameManager, \
+        RegisterManager, TempBox, compute_vars_longevity
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm import locations
+from pypy.jit.backend.arm.locations import imm, get_fp_offset
+from pypy.jit.backend.arm.helper.regalloc import (prepare_op_by_helper_call,
+                                                    prepare_op_unary_cmp,
+                                                    prepare_op_ri,
+                                                    prepare_cmp_op,
+                                                    prepare_float_op,
+                                                    check_imm_arg,
+                                                    check_imm_box
+                                                    )
+from pypy.jit.backend.arm.jump import remap_frame_layout_mixed
+from pypy.jit.backend.arm.arch import MY_COPY_OF_REGS
+from pypy.jit.backend.arm.arch import WORD
+from pypy.jit.codewriter import longlong
+from pypy.jit.metainterp.history import (Const, ConstInt, ConstFloat, ConstPtr,
+                                        Box, BoxPtr,
+                                        INT, REF, FLOAT)
+from pypy.jit.metainterp.history import JitCellToken, TargetToken
+from pypy.jit.metainterp.resoperation import rop
+from pypy.jit.backend.llsupport.descr import ArrayDescr
+from pypy.jit.backend.llsupport import symbolic
+from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.backend.llsupport.descr import unpack_arraydescr
+from pypy.jit.backend.llsupport.descr import unpack_fielddescr
+from pypy.jit.backend.llsupport.descr import unpack_interiorfielddescr
+from pypy.rlib.objectmodel import we_are_translated
+
+
+# xxx hack: set a default value for TargetToken._arm_loop_code.  If 0, we know
+# that it is a LABEL that was not compiled yet.
+TargetToken._arm_loop_code = 0
+
+class TempInt(TempBox):
+    type = INT
+
+    def __repr__(self):
+        return "<TempInt at %s>" % (id(self),)
+
+
+class TempPtr(TempBox):
+    type = REF
+
+    def __repr__(self):
+        return "<TempPtr at %s>" % (id(self),)
+
+
+class TempFloat(TempBox):
+    type = FLOAT
+
+    def __repr__(self):
+        return "<TempFloat at %s>" % (id(self),)
+
+
+
+class ARMFrameManager(FrameManager):
+
+    def __init__(self):
+        FrameManager.__init__(self)
+
+    @staticmethod
+    def frame_pos(i, box_type):
+        if box_type == FLOAT:
+            return locations.StackLocation(i, get_fp_offset(i + 1), box_type)
+        else:
+            return locations.StackLocation(i, get_fp_offset(i), box_type)
+
+    @staticmethod
+    def frame_size(type):
+        if type == FLOAT:
+            return  2
+        return 1
+
+    @staticmethod
+    def get_loc_index(loc):
+        assert loc.is_stack()
+        return loc.position
+
+
+def void(self, op, fcond):
+    return []
+
+
+class VFPRegisterManager(RegisterManager):
+    all_regs = r.all_vfp_regs
+    box_types = [FLOAT]
+    save_around_call_regs = r.all_vfp_regs
+
+    def convert_to_imm(self, c):
+        adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
+        x = c.getfloatstorage()
+        rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
+        return locations.ConstFloatLoc(adr)
+
+    def __init__(self, longevity, frame_manager=None, assembler=None):
+        RegisterManager.__init__(self, longevity, frame_manager, assembler)
+
+    def after_call(self, v):
+        """ Adjust registers according to the result of the call,
+        which is in variable v.
+        """
+        self._check_type(v)
+        reg = self.force_allocate_reg(v, selected_reg=r.d0)
+        return reg
+
+    def ensure_value_is_boxed(self, thing, forbidden_vars=[]):
+        loc = None
+        if isinstance(thing, Const):
+            assert isinstance(thing, ConstFloat)
+            loc = self.get_scratch_reg(FLOAT, self.temp_boxes + forbidden_vars)
+            immvalue = self.convert_to_imm(thing)
+            self.assembler.load(loc, immvalue)
+        else:
+            loc = self.make_sure_var_in_reg(thing,
+                            forbidden_vars=self.temp_boxes + forbidden_vars)
+        return loc
+
+    def get_scratch_reg(self, type=FLOAT, forbidden_vars=[],
+                                                        selected_reg=None):
+        assert type == FLOAT  # for now
+        box = TempFloat()
+        self.temp_boxes.append(box)
+        reg = self.force_allocate_reg(box, forbidden_vars=forbidden_vars,
+                                                    selected_reg=selected_reg)
+        return reg
+
+
+class ARMv7RegisterManager(RegisterManager):
+    all_regs = r.all_regs
+    box_types = None       # or a list of acceptable types
+    no_lower_byte_regs = all_regs
+    save_around_call_regs = r.caller_resp
+
+    REGLOC_TO_COPY_AREA_OFS = {
+        r.r2: MY_COPY_OF_REGS + 0 * WORD,
+        r.r3: MY_COPY_OF_REGS + 1 * WORD,
+        r.r4: MY_COPY_OF_REGS + 2 * WORD,
+        r.r5: MY_COPY_OF_REGS + 3 * WORD,
+        r.r6: MY_COPY_OF_REGS + 4 * WORD,
+        r.r7: MY_COPY_OF_REGS + 5 * WORD,
+        r.r8: MY_COPY_OF_REGS + 6 * WORD,
+        r.r9: MY_COPY_OF_REGS + 7 * WORD,
+        r.r10: MY_COPY_OF_REGS + 8 * WORD,
+    }
+
+    def __init__(self, longevity, frame_manager=None, assembler=None):
+        RegisterManager.__init__(self, longevity, frame_manager, assembler)
+
+    def call_result_location(self, v):
+        return r.r0
+
+    def convert_to_imm(self, c):
+        if isinstance(c, ConstInt):
+            val = rffi.cast(rffi.INT, c.value)
+            return locations.ImmLocation(val)
+        else:
+            assert isinstance(c, ConstPtr)
+            return locations.ImmLocation(rffi.cast(lltype.Signed, c.value))
+        assert 0
+
+    def ensure_value_is_boxed(self, thing, forbidden_vars=None):
+        loc = None
+        if isinstance(thing, Const):
+            if isinstance(thing, ConstPtr):
+                tp = REF
+            else:
+                tp = INT
+            loc = self.get_scratch_reg(tp, forbidden_vars=self.temp_boxes
+                                                            + forbidden_vars)
+            immvalue = self.convert_to_imm(thing)
+            self.assembler.load(loc, immvalue)
+        else:
+            loc = self.make_sure_var_in_reg(thing,
+                            forbidden_vars=self.temp_boxes + forbidden_vars)
+        return loc
+
+    def get_scratch_reg(self, type=INT, forbidden_vars=[], selected_reg=None):
+        assert type == INT or type == REF
+        box = TempBox()
+        self.temp_boxes.append(box)
+        reg = self.force_allocate_reg(box, forbidden_vars=forbidden_vars,
+                                                    selected_reg=selected_reg)
+        return reg
+
+
+class Regalloc(object):
+
+    def __init__(self, frame_manager=None, assembler=None):
+        self.cpu = assembler.cpu
+        self.assembler = assembler
+        self.frame_manager = frame_manager
+        self.jump_target_descr = None
+        self.final_jump_op = None
+
+    def loc(self, var):
+        if var.type == FLOAT:
+            return self.vfprm.loc(var)
+        else:
+            return self.rm.loc(var)
+
+    def position(self):
+        return self.rm.position
+
+    def next_instruction(self):
+        self.rm.next_instruction()
+        self.vfprm.next_instruction()
+
+    def _check_invariants(self):
+        self.rm._check_invariants()
+        self.vfprm._check_invariants()
+
+    def stays_alive(self, v):
+        if v.type == FLOAT:
+            return self.vfprm.stays_alive(v)
+        else:
+            return self.rm.stays_alive(v)
+
+    def call_result_location(self, v):
+        if v.type == FLOAT:
+            return self.vfprm.call_result_location(v)
+        else:
+            return self.rm.call_result_location(v)
+
+    def after_call(self, v):
+        if v.type == FLOAT:
+            return self.vfprm.after_call(v)
+        else:
+            return self.rm.after_call(v)
+
+    def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
+                           need_lower_byte=False):
+        if var.type == FLOAT:
+            return self.vfprm.force_allocate_reg(var, forbidden_vars,
+                                               selected_reg, need_lower_byte)
+        else:
+            return self.rm.force_allocate_reg(var, forbidden_vars,
+                                              selected_reg, need_lower_byte)
+
+    def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False):
+        if v.type == FLOAT:
+            return self.vfprm.try_allocate_reg(v, selected_reg,
+                                                            need_lower_byte)
+        else:
+            return self.rm.try_allocate_reg(v, selected_reg, need_lower_byte)
+
+    def possibly_free_var(self, var):
+        if var.type == FLOAT:
+            self.vfprm.possibly_free_var(var)
+        else:
+            self.rm.possibly_free_var(var)
+
+    def possibly_free_vars_for_op(self, op):
+        for i in range(op.numargs()):
+            var = op.getarg(i)
+            if var is not None:  # xxx kludgy
+                self.possibly_free_var(var)
+
+    def possibly_free_vars(self, vars):
+        for var in vars:
+            if var is not None:  # xxx kludgy
+                self.possibly_free_var(var)
+
+    def get_scratch_reg(self, type, forbidden_vars=[], selected_reg=None):
+        if type == FLOAT:
+            return self.vfprm.get_scratch_reg(type, forbidden_vars,
+                                                                selected_reg)
+        else:
+            return self.rm.get_scratch_reg(type, forbidden_vars, selected_reg)
+
+    def free_temp_vars(self):
+        self.rm.free_temp_vars()
+        self.vfprm.free_temp_vars()
+
+    def make_sure_var_in_reg(self, var, forbidden_vars=[],
+                         selected_reg=None, need_lower_byte=False):
+        assert 0, 'should not be called directly'
+
+    def convert_to_imm(self, value):
+        if isinstance(value, ConstInt):
+            return self.rm.convert_to_imm(value)
+        else:
+            assert isinstance(value, ConstFloat)
+            return self.vfprm.convert_to_imm(value)
+
+    def _prepare(self,  inputargs, operations):
+        longevity, last_real_usage = compute_vars_longevity(
+                                                    inputargs, operations)
+        self.longevity = longevity
+        self.last_real_usage = last_real_usage
+        fm = self.frame_manager
+        asm = self.assembler
+        self.vfprm = VFPRegisterManager(longevity, fm, asm)
+        self.rm = ARMv7RegisterManager(longevity, fm, asm)
+
+    def prepare_loop(self, inputargs, operations):
+        self._prepare(inputargs, operations)
+        self._set_initial_bindings(inputargs)
+        self.possibly_free_vars(inputargs)
+
+    def prepare_bridge(self, inputargs, arglocs, ops):
+        self._prepare(inputargs, ops)
+        self._update_bindings(arglocs, inputargs)
+
+    def _set_initial_bindings(self, inputargs):
+        # The first inputargs are passed in registers r0-r3
+        # we relly on the soft-float calling convention so we need to move
+        # float params to the coprocessor.
+        if self.cpu.use_hf_abi:
+            self._set_initial_bindings_hf(inputargs)
+        else:
+            self._set_initial_bindings_sf(inputargs)
+
+    def _set_initial_bindings_sf(self, inputargs):
+
+        arg_index = 0
+        count = 0
+        n_register_args = len(r.argument_regs)
+        cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD)
+        for box in inputargs:
+            assert isinstance(box, Box)
+            # handle inputargs in argument registers
+            if box.type == FLOAT and arg_index % 2 != 0:
+                arg_index += 1  # align argument index for float passed
+                                # in register
+            if arg_index < n_register_args:
+                if box.type == FLOAT:
+                    loc = r.argument_regs[arg_index]
+                    loc2 = r.argument_regs[arg_index + 1]
+                    vfpreg = self.try_allocate_reg(box)
+                    # move soft-float argument to vfp
+                    self.assembler.mov_to_vfp_loc(loc, loc2, vfpreg)
+                    arg_index += 2  # this argument used two argument registers
+                else:
+                    loc = r.argument_regs[arg_index]
+                    self.try_allocate_reg(box, selected_reg=loc)
+                    arg_index += 1
+            else:
+                # treat stack args as stack locations with a negative offset
+                if box.type == FLOAT:
+                    cur_frame_pos -= 2
+                    if count % 2 != 0: # Stack argument alignment
+                        cur_frame_pos -= 1
+                        count = 0
+                else:
+                    cur_frame_pos -= 1
+                    count += 1
+                loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
+                self.frame_manager.set_binding(box, loc)
+
+    def _set_initial_bindings_hf(self, inputargs):
+
+        arg_index = vfp_arg_index = 0
+        count = 0
+        n_reg_args = len(r.argument_regs)
+        n_vfp_reg_args = len(r.vfp_argument_regs)
+        cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD)
+        for box in inputargs:
+            assert isinstance(box, Box)
+            # handle inputargs in argument registers
+            if box.type != FLOAT and arg_index < n_reg_args:
+                reg = r.argument_regs[arg_index]
+                self.try_allocate_reg(box, selected_reg=reg)
+                arg_index += 1
+            elif box.type == FLOAT and vfp_arg_index < n_vfp_reg_args:
+                reg = r.vfp_argument_regs[vfp_arg_index]
+                self.try_allocate_reg(box, selected_reg=reg)
+                vfp_arg_index += 1
+            else:
+                # treat stack args as stack locations with a negative offset
+                if box.type == FLOAT:
+                    cur_frame_pos -= 2
+                    if count % 2 != 0: # Stack argument alignment
+                        cur_frame_pos -= 1
+                        count = 0
+                else:
+                    cur_frame_pos -= 1
+                    count += 1
+                loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
+                self.frame_manager.set_binding(box, loc)
+
+    def _update_bindings(self, locs, inputargs):
+        used = {}
+        i = 0
+        for loc in locs:
+            arg = inputargs[i]
+            i += 1
+            if loc.is_reg():
+                self.rm.reg_bindings[arg] = loc
+            elif loc.is_vfp_reg():
+                self.vfprm.reg_bindings[arg] = loc
+            else:
+                assert loc.is_stack()
+                self.frame_manager.set_binding(arg, loc)
+            used[loc] = None
+
+        # XXX combine with x86 code and move to llsupport
+        self.rm.free_regs = []
+        for reg in self.rm.all_regs:
+            if reg not in used:
+                self.rm.free_regs.append(reg)
+        self.vfprm.free_regs = []
+        for reg in self.vfprm.all_regs:
+            if reg not in used:
+                self.vfprm.free_regs.append(reg)
+        # note: we need to make a copy of inputargs because possibly_free_vars
+        # is also used on op args, which is a non-resizable list
+        self.possibly_free_vars(list(inputargs))
+
+    def perform_llong(self, op, args, fcond):
+        return self.assembler.regalloc_emit_llong(op, args, fcond, self)
+    
+    def perform_math(self, op, args, fcond):
+        return self.assembler.regalloc_emit_math(op, args, self, fcond)
+
+    def force_spill_var(self, var):
+        if var.type == FLOAT:
+            self.vfprm.force_spill_var(var)
+        else:
+            self.rm.force_spill_var(var)
+
+    def before_call(self, force_store=[], save_all_regs=False):
+        self.rm.before_call(force_store, save_all_regs)
+        self.vfprm.before_call(force_store, save_all_regs)
+
+    def _ensure_value_is_boxed(self, thing, forbidden_vars=[]):
+        if thing.type == FLOAT:
+            return self.vfprm.ensure_value_is_boxed(thing, forbidden_vars)
+        else:
+            return self.rm.ensure_value_is_boxed(thing, forbidden_vars)
+
+    def _sync_var(self, v):
+        if v.type == FLOAT:
+            self.vfprm._sync_var(v)
+        else:
+            self.rm._sync_var(v)
+
+    def _prepare_op_int_add(self, op, fcond):
+        boxes = op.getarglist()
+        a0, a1 = boxes
+        imm_a0 = check_imm_box(a0)
+        imm_a1 = check_imm_box(a1)
+        if not imm_a0 and imm_a1:
+            l0 = self._ensure_value_is_boxed(a0, boxes)
+            l1 = self.convert_to_imm(a1)
+        elif imm_a0 and not imm_a1:
+            l0 = self.convert_to_imm(a0)
+            l1 = self._ensure_value_is_boxed(a1, boxes)
+        else:
+            l0 = self._ensure_value_is_boxed(a0, boxes)
+            l1 = self._ensure_value_is_boxed(a1, boxes)
+        return [l0, l1]
+
+    def prepare_op_int_add(self, op, fcond):
+        locs = self._prepare_op_int_add(op, fcond)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+        return locs + [res]
+
+    def _prepare_op_int_sub(self, op, fcond):
+        a0, a1 = boxes = op.getarglist()
+        imm_a0 = check_imm_box(a0)
+        imm_a1 = check_imm_box(a1)
+        if not imm_a0 and imm_a1:
+            l0 = self._ensure_value_is_boxed(a0, boxes)
+            l1 = self.convert_to_imm(a1)
+        elif imm_a0 and not imm_a1:
+            l0 = self.convert_to_imm(a0)
+            l1 = self._ensure_value_is_boxed(a1, boxes)
+        else:
+            l0 = self._ensure_value_is_boxed(a0, boxes)
+            l1 = self._ensure_value_is_boxed(a1, boxes)
+        return [l0, l1]
+
+    def prepare_op_int_sub(self, op, fcond):
+        locs = self._prepare_op_int_sub(op, fcond)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+        return locs + [res]
+
+    def prepare_op_int_mul(self, op, fcond):
+        boxes = op.getarglist()
+        a0, a1 = boxes
+
+        reg1 = self._ensure_value_is_boxed(a0, forbidden_vars=boxes)
+        reg2 = self._ensure_value_is_boxed(a1, forbidden_vars=boxes)
+
+        self.possibly_free_vars(boxes)
+        self.possibly_free_vars_for_op(op)
+        res = self.force_allocate_reg(op.result)
+        self.possibly_free_var(op.result)
+        return [reg1, reg2, res]
+    
+    def prepare_op_int_force_ge_zero(self, op, fcond):
+        argloc = self._ensure_value_is_boxed(op.getarg(0))
+        resloc = self.force_allocate_reg(op.result, [op.getarg(0)])
+        return [argloc, resloc]
+
+    def prepare_guard_int_mul_ovf(self, op, guard, fcond):
+        boxes = op.getarglist()
+        reg1 = self._ensure_value_is_boxed(boxes[0], forbidden_vars=boxes)
+        reg2 = self._ensure_value_is_boxed(boxes[1], forbidden_vars=boxes)
+        res = self.force_allocate_reg(op.result)
+        return self._prepare_guard(guard, [reg1, reg2, res])
+
+    def prepare_guard_int_add_ovf(self, op, guard, fcond):
+        locs = self._prepare_op_int_add(op, fcond)
+        res = self.force_allocate_reg(op.result)
+        locs.append(res)
+        return self._prepare_guard(guard, locs)
+
+    def prepare_guard_int_sub_ovf(self, op, guard, fcond):
+        locs = self._prepare_op_int_sub(op, fcond)
+        res = self.force_allocate_reg(op.result)
+        locs.append(res)
+        return self._prepare_guard(guard, locs)
+
+    prepare_op_int_floordiv = prepare_op_by_helper_call('int_floordiv')
+    prepare_op_int_mod = prepare_op_by_helper_call('int_mod')
+    prepare_op_uint_floordiv = prepare_op_by_helper_call('unit_floordiv')
+
+    prepare_op_int_and = prepare_op_ri('int_and')
+    prepare_op_int_or = prepare_op_ri('int_or')
+    prepare_op_int_xor = prepare_op_ri('int_xor')
+    prepare_op_int_lshift = prepare_op_ri('int_lshift', imm_size=0x1F,
+                                        allow_zero=False, commutative=False)
+    prepare_op_int_rshift = prepare_op_ri('int_rshift', imm_size=0x1F,
+                                        allow_zero=False, commutative=False)
+    prepare_op_uint_rshift = prepare_op_ri('uint_rshift', imm_size=0x1F,
+                                        allow_zero=False, commutative=False)
+
+    prepare_op_int_lt = prepare_cmp_op('int_lt')
+    prepare_op_int_le = prepare_cmp_op('int_le')
+    prepare_op_int_eq = prepare_cmp_op('int_eq')
+    prepare_op_int_ne = prepare_cmp_op('int_ne')
+    prepare_op_int_gt = prepare_cmp_op('int_gt')
+    prepare_op_int_ge = prepare_cmp_op('int_ge')
+
+    prepare_op_uint_le = prepare_cmp_op('uint_le')
+    prepare_op_uint_gt = prepare_cmp_op('uint_gt')
+
+    prepare_op_uint_lt = prepare_cmp_op('uint_lt')
+    prepare_op_uint_ge = prepare_cmp_op('uint_ge')
+
+    prepare_op_ptr_eq = prepare_op_instance_ptr_eq = prepare_op_int_eq
+    prepare_op_ptr_ne = prepare_op_instance_ptr_ne = prepare_op_int_ne
+
+    prepare_guard_int_lt = prepare_cmp_op('guard_int_lt')
+    prepare_guard_int_le = prepare_cmp_op('guard_int_le')
+    prepare_guard_int_eq = prepare_cmp_op('guard_int_eq')
+    prepare_guard_int_ne = prepare_cmp_op('guard_int_ne')
+    prepare_guard_int_gt = prepare_cmp_op('guard_int_gt')
+    prepare_guard_int_ge = prepare_cmp_op('guard_int_ge')
+
+    prepare_guard_uint_le = prepare_cmp_op('guard_uint_le')
+    prepare_guard_uint_gt = prepare_cmp_op('guard_uint_gt')
+
+    prepare_guard_uint_lt = prepare_cmp_op('guard_uint_lt')
+    prepare_guard_uint_ge = prepare_cmp_op('guard_uint_ge')
+
+    prepare_guard_ptr_eq = prepare_guard_instance_ptr_eq = prepare_guard_int_eq
+    prepare_guard_ptr_ne = prepare_guard_instance_ptr_ne = prepare_guard_int_ne
+
+    prepare_op_int_add_ovf = prepare_op_int_add
+    prepare_op_int_sub_ovf = prepare_op_int_sub
+
+    prepare_op_int_is_true = prepare_op_unary_cmp('int_is_true')
+    prepare_op_int_is_zero = prepare_op_unary_cmp('int_is_zero')
+
+    prepare_guard_int_is_true = prepare_op_unary_cmp('int_is_true')
+    prepare_guard_int_is_zero = prepare_op_unary_cmp('int_is_zero')
+
+    def prepare_op_int_neg(self, op, fcond):
+        l0 = self._ensure_value_is_boxed(op.getarg(0))
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        resloc = self.force_allocate_reg(op.result)
+        return [l0, resloc]
+
+    prepare_op_int_invert = prepare_op_int_neg
+
+    def prepare_op_call(self, op, fcond):
+        effectinfo = op.getdescr().get_extra_info()
+        if effectinfo is not None:
+            oopspecindex = effectinfo.oopspecindex
+            if oopspecindex in (EffectInfo.OS_LLONG_ADD,
+                            EffectInfo.OS_LLONG_SUB,
+                            EffectInfo.OS_LLONG_AND,
+                            EffectInfo.OS_LLONG_OR,
+                            EffectInfo.OS_LLONG_XOR):
+                args = self._prepare_llong_binop_xx(op, fcond)
+                self.perform_llong(op, args, fcond)
+                return
+            if oopspecindex == EffectInfo.OS_LLONG_TO_INT:
+                args = self._prepare_llong_to_int(op, fcond)
+                self.perform_llong(op, args, fcond)
+                return
+            if oopspecindex == EffectInfo.OS_MATH_SQRT:
+                args = self.prepare_op_math_sqrt(op, fcond)
+                self.perform_math(op, args, fcond)
+                return
+        return self._prepare_call(op)
+
+    def _prepare_call(self, op, force_store=[], save_all_regs=False):
+        args = []
+        args.append(None)
+        for i in range(op.numargs()):
+            args.append(self.loc(op.getarg(i)))
+        # spill variables that need to be saved around calls
+        self.vfprm.before_call(save_all_regs=save_all_regs)
+        if not save_all_regs:
+            gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
+            if gcrootmap and gcrootmap.is_shadow_stack:
+                save_all_regs = 2
+        self.rm.before_call(save_all_regs=save_all_regs)
+        if op.result:
+            resloc = self.after_call(op.result)
+            args[0] = resloc
+        self.before_call_called = True
+        return args
+
+    def prepare_op_call_malloc_gc(self, op, fcond):
+        return self._prepare_call(op)
+
+    def _prepare_llong_binop_xx(self, op, fcond):
+        # arg 0 is the address of the function
+        loc0 = self._ensure_value_is_boxed(op.getarg(1))
+        loc1 = self._ensure_value_is_boxed(op.getarg(2))
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.vfprm.force_allocate_reg(op.result)
+        return [loc0, loc1, res]
+
+    def _prepare_llong_to_int(self, op, fcond):
+        loc0 = self._ensure_value_is_boxed(op.getarg(1))
+        res = self.force_allocate_reg(op.result)
+        return [loc0, res]
+
+
+    def _prepare_guard(self, op, args=None):
+        if args is None:
+            args = []
+        args.append(imm(self.frame_manager.get_frame_depth()))
+        for arg in op.getfailargs():
+            if arg:
+                args.append(self.loc(arg))
+            else:
+                args.append(None)
+        return args
+
+    def prepare_op_finish(self, op, fcond):
+        args = [None] * (op.numargs() + 1)
+        for i in range(op.numargs()):
+            arg = op.getarg(i)
+            if arg:
+                args[i] = self.loc(arg)
+                self.possibly_free_var(arg)
+        n = self.cpu.get_fail_descr_number(op.getdescr())
+        args[-1] = imm(n)
+        return args
+
+    def prepare_op_guard_true(self, op, fcond):
+        l0 = self._ensure_value_is_boxed(op.getarg(0))
+        args = self._prepare_guard(op, [l0])
+        return args
+
+    prepare_op_guard_false = prepare_op_guard_true
+    prepare_op_guard_nonnull = prepare_op_guard_true
+    prepare_op_guard_isnull = prepare_op_guard_true
+
+    def prepare_op_guard_value(self, op, fcond):
+        boxes = op.getarglist()
+        a0, a1 = boxes
+        imm_a1 = check_imm_box(a1)
+        l0 = self._ensure_value_is_boxed(a0, boxes)
+        if not imm_a1:
+            l1 = self._ensure_value_is_boxed(a1, boxes)
+        else:
+            l1 = self.convert_to_imm(a1)
+        assert op.result is None
+        arglocs = self._prepare_guard(op, [l0, l1])
+        self.possibly_free_vars(op.getarglist())
+        self.possibly_free_vars(op.getfailargs())
+        return arglocs
+
+    def prepare_op_guard_no_overflow(self, op, fcond):
+        locs = self._prepare_guard(op)
+        self.possibly_free_vars(op.getfailargs())
+        return locs
+
+    prepare_op_guard_overflow = prepare_op_guard_no_overflow
+    prepare_op_guard_not_invalidated = prepare_op_guard_no_overflow
+
+    def prepare_op_guard_exception(self, op, fcond):
+        boxes = op.getarglist()
+        arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint()))
+        loc = self._ensure_value_is_boxed(arg0)
+        loc1 = self.get_scratch_reg(INT, boxes)
+        if op.result in self.longevity:
+            resloc = self.force_allocate_reg(op.result, boxes)
+            self.possibly_free_var(op.result)
+        else:
+            resloc = None
+        pos_exc_value = imm(self.cpu.pos_exc_value())
+        pos_exception = imm(self.cpu.pos_exception())
+        arglocs = self._prepare_guard(op,
+                    [loc, loc1, resloc, pos_exc_value, pos_exception])
+        return arglocs
+
+    def prepare_op_guard_no_exception(self, op, fcond):
+        loc = self._ensure_value_is_boxed(
+                    ConstInt(self.cpu.pos_exception()))
+        arglocs = self._prepare_guard(op, [loc])
+        return arglocs
+
+    def prepare_op_guard_class(self, op, fcond):
+        return self._prepare_guard_class(op, fcond)
+
+    prepare_op_guard_nonnull_class = prepare_op_guard_class
+
+    def _prepare_guard_class(self, op, fcond):
+        assert isinstance(op.getarg(0), Box)
+        boxes = op.getarglist()
+
+        x = self._ensure_value_is_boxed(boxes[0], boxes)
+        y_val = rffi.cast(lltype.Signed, op.getarg(1).getint())
+
+        arglocs = [x, None, None]
+
+        offset = self.cpu.vtable_offset
+        if offset is not None:
+            y = self.get_scratch_reg(INT, forbidden_vars=boxes)
+            self.assembler.load(y, imm(y_val))
+
+            assert check_imm_arg(offset)
+            offset_loc = imm(offset)
+
+            arglocs[1] = y
+            arglocs[2] = offset_loc
+        else:
+            # XXX hard-coded assumption: to go from an object to its class
+            # we use the following algorithm:
+            #   - read the typeid from mem(locs[0]), i.e. at offset 0
+            #   - keep the lower 16 bits read there
+            #   - multiply by 4 and use it as an offset in type_info_group
+            #   - add 16 bytes, to go past the TYPE_INFO structure
+            classptr = y_val
+            # here, we have to go back from 'classptr' to the value expected
+            # from reading the 16 bits in the object header
+            from pypy.rpython.memory.gctypelayout import GCData
+            sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
+            type_info_group = llop.gc_get_type_info_group(llmemory.Address)
+            type_info_group = rffi.cast(lltype.Signed, type_info_group)
+            expected_typeid = classptr - sizeof_ti - type_info_group
+            expected_typeid >>= 2
+            if check_imm_arg(expected_typeid):
+                arglocs[1] = imm(expected_typeid)
+            else:
+                y = self.get_scratch_reg(INT, forbidden_vars=boxes)
+                self.assembler.load(y, imm(expected_typeid))
+                arglocs[1] = y
+
+        return self._prepare_guard(op, arglocs)
+
+        return arglocs
+
+    def compute_hint_frame_locations(self, operations):
+        # optimization only: fill in the 'hint_frame_locations' dictionary
+        # of rm and xrm based on the JUMP at the end of the loop, by looking
+        # at where we would like the boxes to be after the jump.
+        op = operations[-1]
+        if op.getopnum() != rop.JUMP:
+            return
+        self.final_jump_op = op
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        if descr._arm_loop_code != 0:
+            # if the target LABEL was already compiled, i.e. if it belongs
+            # to some already-compiled piece of code
+            self._compute_hint_frame_locations_from_descr(descr)
+        #else:
+        #   The loop ends in a JUMP going back to a LABEL in the same loop.
+        #   We cannot fill 'hint_frame_locations' immediately, but we can
+        #   wait until the corresponding prepare_op_label() to know where the
+        #   we would like the boxes to be after the jump.
+
+    def _compute_hint_frame_locations_from_descr(self, descr):
+        arglocs = self.assembler.target_arglocs(descr)
+        jump_op = self.final_jump_op
+        assert len(arglocs) == jump_op.numargs()
+        for i in range(jump_op.numargs()):
+            box = jump_op.getarg(i)
+            if isinstance(box, Box):
+                loc = arglocs[i]
+                if loc is not None and loc.is_stack():
+                    self.frame_manager.hint_frame_locations[box] = loc
+
+    def prepare_op_jump(self, op, fcond):
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        self.jump_target_descr = descr
+        arglocs = self.assembler.target_arglocs(descr)
+
+        # get temporary locs
+        tmploc = r.ip
+        vfptmploc = r.vfp_ip
+
+        # Part about non-floats
+        src_locations1 = []
+        dst_locations1 = []
+        # Part about floats
+        src_locations2 = []
+        dst_locations2 = []
+
+        # Build the four lists
+        for i in range(op.numargs()):
+            box = op.getarg(i)
+            src_loc = self.loc(box)
+            dst_loc = arglocs[i]
+            if box.type != FLOAT:
+                src_locations1.append(src_loc)
+                dst_locations1.append(dst_loc)
+            else:
+                src_locations2.append(src_loc)
+                dst_locations2.append(dst_loc)
+        remap_frame_layout_mixed(self.assembler,
+                                 src_locations1, dst_locations1, tmploc,
+                                 src_locations2, dst_locations2, vfptmploc)
+        return []
+
+    def prepare_op_setfield_gc(self, op, fcond):
+        boxes = op.getarglist()
+        a0, a1 = boxes
+        ofs, size, sign = unpack_fielddescr(op.getdescr())
+        base_loc = self._ensure_value_is_boxed(a0, boxes)
+        value_loc = self._ensure_value_is_boxed(a1, boxes)
+        if check_imm_arg(ofs):
+            ofs_loc = imm(ofs)
+        else:
+            ofs_loc = self.get_scratch_reg(INT, boxes)
+            self.assembler.load(ofs_loc, imm(ofs))
+        return [value_loc, base_loc, ofs_loc, imm(size)]
+
+    prepare_op_setfield_raw = prepare_op_setfield_gc
+
+    def prepare_op_getfield_gc(self, op, fcond):
+        a0 = op.getarg(0)
+        ofs, size, sign = unpack_fielddescr(op.getdescr())
+        base_loc = self._ensure_value_is_boxed(a0)
+        immofs = imm(ofs)
+        if check_imm_arg(ofs):
+            ofs_loc = immofs
+        else:
+            ofs_loc = self.get_scratch_reg(INT, [a0])
+            self.assembler.load(ofs_loc, immofs)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+        return [base_loc, ofs_loc, res, imm(size)]
+
+    prepare_op_getfield_raw = prepare_op_getfield_gc
+    prepare_op_getfield_raw_pure = prepare_op_getfield_gc
+    prepare_op_getfield_gc_pure = prepare_op_getfield_gc
+
+    def prepare_op_getinteriorfield_gc(self, op, fcond):
+        t = unpack_interiorfielddescr(op.getdescr())
+        ofs, itemsize, fieldsize, sign = t
+        args = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(op.getarg(0), args)
+        index_loc = self._ensure_value_is_boxed(op.getarg(1), args)
+        immofs = imm(ofs)
+        if check_imm_arg(ofs):
+            ofs_loc = immofs
+        else:
+            ofs_loc = self.get_scratch_reg(INT, args)
+            self.assembler.load(ofs_loc, immofs)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        result_loc = self.force_allocate_reg(op.result)
+        return [base_loc, index_loc, result_loc, ofs_loc, imm(ofs),
+                                    imm(itemsize), imm(fieldsize)]
+
+    def prepare_op_setinteriorfield_gc(self, op, fcond):
+        t = unpack_interiorfielddescr(op.getdescr())
+        ofs, itemsize, fieldsize, sign = t
+        args = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(op.getarg(0), args)
+        index_loc = self._ensure_value_is_boxed(op.getarg(1), args)
+        value_loc = self._ensure_value_is_boxed(op.getarg(2), args)
+        immofs = imm(ofs)
+        if check_imm_arg(ofs):
+            ofs_loc = immofs
+        else:
+            ofs_loc = self.get_scratch_reg(INT, args)
+            self.assembler.load(ofs_loc, immofs)
+        return [base_loc, index_loc, value_loc, ofs_loc, imm(ofs),
+                                        imm(itemsize), imm(fieldsize)]
+    prepare_op_setinteriorfield_raw = prepare_op_setinteriorfield_gc
+
+    def prepare_op_arraylen_gc(self, op, fcond):
+        arraydescr = op.getdescr()
+        assert isinstance(arraydescr, ArrayDescr)
+        ofs = arraydescr.lendescr.offset
+        arg = op.getarg(0)
+        base_loc = self._ensure_value_is_boxed(arg)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+        return [res, base_loc, imm(ofs)]
+
+    def prepare_op_setarrayitem_gc(self, op, fcond):
+        size, ofs, _ = unpack_arraydescr(op.getdescr())
+        scale = get_scale(size)
+        args = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(args[0], args)
+        ofs_loc = self._ensure_value_is_boxed(args[1], args)
+        value_loc = self._ensure_value_is_boxed(args[2], args)
+        assert check_imm_arg(ofs)
+        return [value_loc, base_loc, ofs_loc, imm(scale), imm(ofs)]
+    prepare_op_setarrayitem_raw = prepare_op_setarrayitem_gc
+    prepare_op_raw_store = prepare_op_setarrayitem_gc
+
+    def prepare_op_getarrayitem_gc(self, op, fcond):
+        boxes = op.getarglist()
+        size, ofs, _ = unpack_arraydescr(op.getdescr())
+        scale = get_scale(size)
+        base_loc = self._ensure_value_is_boxed(boxes[0], boxes)
+        ofs_loc = self._ensure_value_is_boxed(boxes[1], boxes)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+        assert check_imm_arg(ofs)
+        return [res, base_loc, ofs_loc, imm(scale), imm(ofs)]
+
+    prepare_op_getarrayitem_raw = prepare_op_getarrayitem_gc
+    prepare_op_getarrayitem_raw_pure = prepare_op_getarrayitem_gc
+    prepare_op_getarrayitem_gc_pure = prepare_op_getarrayitem_gc
+    prepare_op_raw_load = prepare_op_getarrayitem_gc
+
+    def prepare_op_strlen(self, op, fcond):
+        args = op.getarglist()
+        l0 = self._ensure_value_is_boxed(op.getarg(0))
+        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
+                                         self.cpu.translate_support_code)
+        immofs = imm(ofs_length)
+        if check_imm_arg(ofs_length):
+            l1 = immofs
+        else:
+            l1 = self.get_scratch_reg(INT, args)
+            self.assembler.load(l1, immofs)
+
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+
+        res = self.force_allocate_reg(op.result)
+        self.possibly_free_var(op.result)
+        return [l0, l1, res]
+
+    def prepare_op_strgetitem(self, op, fcond):
+        boxes = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(boxes[0])
+
+        a1 = boxes[1]
+        imm_a1 = check_imm_box(a1)
+        if imm_a1:
+            ofs_loc = self.convert_to_imm(a1)
+        else:
+            ofs_loc = self._ensure_value_is_boxed(a1, boxes)
+
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+
+        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
+                                         self.cpu.translate_support_code)
+        assert itemsize == 1
+        return [res, base_loc, ofs_loc, imm(basesize)]
+
+    def prepare_op_strsetitem(self, op, fcond):
+        boxes = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(boxes[0], boxes)
+        ofs_loc = self._ensure_value_is_boxed(boxes[1], boxes)
+        value_loc = self._ensure_value_is_boxed(boxes[2], boxes)
+        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
+                                         self.cpu.translate_support_code)
+        assert itemsize == 1
+        return [value_loc, base_loc, ofs_loc, imm(basesize)]
+
+    prepare_op_copystrcontent = void
+    prepare_op_copyunicodecontent = void
+
+    def prepare_op_unicodelen(self, op, fcond):
+        l0 = self._ensure_value_is_boxed(op.getarg(0))
+        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
+                                         self.cpu.translate_support_code)
+        immofs = imm(ofs_length)
+        if check_imm_arg(ofs_length):
+            l1 = immofs
+        else:
+            l1 = self.get_scratch_reg(INT, [op.getarg(0)])
+            self.assembler.load(l1, immofs)
+
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+        return [l0, l1, res]
+
+    def prepare_op_unicodegetitem(self, op, fcond):
+        boxes = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(boxes[0], boxes)
+        ofs_loc = self._ensure_value_is_boxed(boxes[1], boxes)
+
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.force_allocate_reg(op.result)
+
+        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
+                                         self.cpu.translate_support_code)
+        scale = itemsize / 2
+        return [res, base_loc, ofs_loc,
+            imm(scale), imm(basesize), imm(itemsize)]
+
+    def prepare_op_unicodesetitem(self, op, fcond):
+        boxes = op.getarglist()
+        base_loc = self._ensure_value_is_boxed(boxes[0], boxes)
+        ofs_loc = self._ensure_value_is_boxed(boxes[1], boxes)
+        value_loc = self._ensure_value_is_boxed(boxes[2], boxes)
+        basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
+                                         self.cpu.translate_support_code)
+        scale = itemsize / 2
+        return [value_loc, base_loc, ofs_loc,
+            imm(scale), imm(basesize), imm(itemsize)]
+
+    def prepare_op_same_as(self, op, fcond):
+        arg = op.getarg(0)
+        imm_arg = check_imm_box(arg)
+        if imm_arg:
+            argloc = self.convert_to_imm(arg)
+        else:
+            argloc = self._ensure_value_is_boxed(arg)
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        resloc = self.force_allocate_reg(op.result)
+        return [argloc, resloc]
+
+    prepare_op_cast_ptr_to_int = prepare_op_same_as
+    prepare_op_cast_int_to_ptr = prepare_op_same_as
+
+    def prepare_op_call_malloc_nursery(self, op, fcond):
+        size_box = op.getarg(0)
+        assert isinstance(size_box, ConstInt)
+        size = size_box.getint()
+
+        self.rm.force_allocate_reg(op.result, selected_reg=r.r0)
+        t = TempInt()
+        self.rm.force_allocate_reg(t, selected_reg=r.r1)
+        self.possibly_free_var(op.result)
+        self.possibly_free_var(t)
+        return [imm(size)]
+
+    def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
+        shape = gcrootmap.get_basic_shape()
+        for v, val in self.frame_manager.bindings.items():
+            if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
+                assert val.is_stack()
+                gcrootmap.add_frame_offset(shape, -val.value)
+        for v, reg in self.rm.reg_bindings.items():
+            if reg is r.r0:
+                continue
+            if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
+                if use_copy_area:
+                    assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
+                    area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
+                    gcrootmap.add_frame_offset(shape, area_offset)
+                else:
+                    assert 0, 'sure??'
+        return gcrootmap.compress_callshape(shape,
+                                            self.assembler.datablockwrapper)
+
+    prepare_op_debug_merge_point = void
+    prepare_op_jit_debug = void
+    prepare_op_keepalive = void
+
+    def prepare_op_cond_call_gc_wb(self, op, fcond):
+        assert op.result is None
+        # we force all arguments in a reg because it will be needed anyway by
+        # the following setfield_gc or setarrayitem_gc. It avoids loading it
+        # twice from the memory.
+        N = op.numargs()
+        args = op.getarglist()
+        arglocs = [self._ensure_value_is_boxed(op.getarg(i), args)
+                                                              for i in range(N)]
+        tmp = self.get_scratch_reg(INT)
+        arglocs.append(tmp)
+        return arglocs
+
+    prepare_op_cond_call_gc_wb_array = prepare_op_cond_call_gc_wb
+
+    def prepare_op_force_token(self, op, fcond):
+        res_loc = self.force_allocate_reg(op.result)
+        self.possibly_free_var(op.result)
+        return [res_loc]
+
+    def prepare_op_label(self, op, fcond):
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        inputargs = op.getarglist()
+        arglocs = [None] * len(inputargs)
+        #
+        # we use force_spill() on the boxes that are not going to be really
+        # used any more in the loop, but that are kept alive anyway
+        # by being in a next LABEL's or a JUMP's argument or fail_args
+        # of some guard
+        position = self.rm.position
+        for arg in inputargs:
+            assert isinstance(arg, Box)
+            if self.last_real_usage.get(arg, -1) <= position:
+                self.force_spill_var(arg)
+
+        #
+        for i in range(len(inputargs)):
+            arg = inputargs[i]
+            assert isinstance(arg, Box)
+            loc = self.loc(arg)
+            arglocs[i] = loc
+            if loc.is_reg():
+                self.frame_manager.mark_as_free(arg)
+        #
+        descr._arm_arglocs = arglocs
+        descr._arm_loop_code = self.assembler.mc.currpos()
+        descr._arm_clt = self.assembler.current_clt
+        self.assembler.target_tokens_currently_compiling[descr] = None
+        self.possibly_free_vars_for_op(op)
+        #
+        # if the LABEL's descr is precisely the target of the JUMP at the
+        # end of the same loop, i.e. if what we are compiling is a single
+        # loop that ends up jumping to this LABEL, then we can now provide
+        # the hints about the expected position of the spilled variables.
+        jump_op = self.final_jump_op
+        if jump_op is not None and jump_op.getdescr() is descr:
+            self._compute_hint_frame_locations_from_descr(descr)
+
+    def prepare_guard_call_may_force(self, op, guard_op, fcond):
+        args = self._prepare_call(op, save_all_regs=True)
+        return self._prepare_guard(guard_op, args)
+    prepare_guard_call_release_gil = prepare_guard_call_may_force
+
+    def prepare_guard_call_assembler(self, op, guard_op, fcond):
+        descr = op.getdescr()
+        assert isinstance(descr, JitCellToken)
+        jd = descr.outermost_jitdriver_sd
+        assert jd is not None
+        vable_index = jd.index_of_virtualizable
+        if vable_index >= 0:
+            self._sync_var(op.getarg(vable_index))
+            vable = self.frame_manager.loc(op.getarg(vable_index))
+        else:
+            vable = imm(0)
+        # make sure the call result location is free
+        tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
+        self.possibly_free_vars(guard_op.getfailargs())
+        return [vable, tmploc] + self._prepare_call(op, save_all_regs=True)
+
+    def _prepare_args_for_new_op(self, new_args):
+        gc_ll_descr = self.cpu.gc_ll_descr
+        args = gc_ll_descr.args_for_new(new_args)
+        arglocs = []
+        for i in range(len(args)):
+            arg = args[i]
+            t = TempInt()
+            l = self.force_allocate_reg(t, selected_reg=r.all_regs[i])
+            self.assembler.load(l, imm(arg))
+            arglocs.append(t)
+        return arglocs
+
+    prepare_op_float_add = prepare_float_op(name='prepare_op_float_add')
+    prepare_op_float_sub = prepare_float_op(name='prepare_op_float_sub')
+    prepare_op_float_mul = prepare_float_op(name='prepare_op_float_mul')
+    prepare_op_float_truediv = prepare_float_op(
+                                            name='prepare_op_float_truediv')
+    prepare_op_float_lt = prepare_float_op(float_result=False,
+                                            name='prepare_op_float_lt')
+    prepare_op_float_le = prepare_float_op(float_result=False,
+                                            name='prepare_op_float_le')
+    prepare_op_float_eq = prepare_float_op(float_result=False,
+                                            name='prepare_op_float_eq')
+    prepare_op_float_ne = prepare_float_op(float_result=False,
+                                            name='prepare_op_float_ne')
+    prepare_op_float_gt = prepare_float_op(float_result=False,
+                                            name='prepare_op_float_gt')
+    prepare_op_float_ge = prepare_float_op(float_result=False,
+                                            name='prepare_op_float_ge')
+    prepare_op_float_neg = prepare_float_op(base=False,
+                                            name='prepare_op_float_neg')
+    prepare_op_float_abs = prepare_float_op(base=False,
+                                            name='prepare_op_float_abs')
+
+    prepare_guard_float_lt = prepare_float_op(guard=True,
+                            float_result=False, name='prepare_guard_float_lt')
+    prepare_guard_float_le = prepare_float_op(guard=True,
+                            float_result=False, name='prepare_guard_float_le')
+    prepare_guard_float_eq = prepare_float_op(guard=True,
+                            float_result=False, name='prepare_guard_float_eq')
+    prepare_guard_float_ne = prepare_float_op(guard=True,
+                            float_result=False, name='prepare_guard_float_ne')
+    prepare_guard_float_gt = prepare_float_op(guard=True,
+                            float_result=False, name='prepare_guard_float_gt')
+    prepare_guard_float_ge = prepare_float_op(guard=True,
+                            float_result=False, name='prepare_guard_float_ge')
+
+    def prepare_op_math_sqrt(self, op, fcond):
+        loc = self._ensure_value_is_boxed(op.getarg(1))
+        self.possibly_free_vars_for_op(op)
+        self.free_temp_vars()
+        res = self.vfprm.force_allocate_reg(op.result)
+        self.possibly_free_var(op.result)
+        return [loc, res]
+
+    def prepare_op_cast_float_to_int(self, op, fcond):
+        loc1 = self._ensure_value_is_boxed(op.getarg(0))
+        res = self.rm.force_allocate_reg(op.result)
+        return [loc1, res]
+
+    def prepare_op_cast_int_to_float(self, op, fcond):
+        loc1 = self._ensure_value_is_boxed(op.getarg(0))
+        res = self.vfprm.force_allocate_reg(op.result)
+        return [loc1, res]
+
+    def prepare_force_spill(self, op, fcond):
+        self.force_spill_var(op.getarg(0))
+        return []
+
+    prepare_op_convert_float_bytes_to_longlong = prepare_float_op(base=False,
+                              name='prepare_op_convert_float_bytes_to_longlong')
+    prepare_op_convert_longlong_bytes_to_float = prepare_float_op(base=False,
+                              name='prepare_op_convert_longlong_bytes_to_float')
+
+    def prepare_op_read_timestamp(self, op, fcond):
+        loc = self.get_scratch_reg(INT)
+        res = self.vfprm.force_allocate_reg(op.result)
+        return [loc, res]
+
+
+def add_none_argument(fn):
+    return lambda self, op, fcond: fn(self, op, None, fcond)
+
+
+def notimplemented(self, op, fcond):
+    print "[ARM/regalloc] %s not implemented" % op.getopname()
+    raise NotImplementedError(op)
+
+
+def notimplemented_with_guard(self, op, guard_op, fcond):
+    print "[ARM/regalloc] %s with guard %s not implemented" % \
+                        (op.getopname(), guard_op.getopname())
+    raise NotImplementedError(op)
+
+operations = [notimplemented] * (rop._LAST + 1)
+operations_with_guard = [notimplemented_with_guard] * (rop._LAST + 1)
+
+
+def get_scale(size):
+    scale = 0
+    while (1 << scale) < size:
+        scale += 1
+    assert (1 << scale) == size
+    return scale
+
+for key, value in rop.__dict__.items():
+    key = key.lower()
+    if key.startswith('_'):
+        continue
+    methname = 'prepare_op_%s' % key
+    if hasattr(Regalloc, methname):
+        func = getattr(Regalloc, methname).im_func
+        operations[value] = func
+
+for key, value in rop.__dict__.items():
+    key = key.lower()
+    if key.startswith('_'):
+        continue
+    methname = 'prepare_guard_%s' % key
+    if hasattr(Regalloc, methname):
+        func = getattr(Regalloc, methname).im_func
+        operations_with_guard[value] = func
+        operations[value] = add_none_argument(func)
diff --git a/pypy/jit/backend/arm/registers.py b/pypy/jit/backend/arm/registers.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/registers.py
@@ -0,0 +1,32 @@
+from pypy.jit.backend.arm.locations import VFPRegisterLocation
+from pypy.jit.backend.arm.locations import RegisterLocation
+
+registers = [RegisterLocation(i) for i in range(16)]
+vfpregisters = [VFPRegisterLocation(i) for i in range(16)]
+[r0, r1, r2, r3, r4, r5, r6, r7,
+    r8, r9, r10, r11, r12, r13, r14, r15] = registers
+
+#vfp registers interpreted as 64-bit registers
+[d0, d1, d2, d3, d4, d5, d6, d7,
+    d8, d9, d10, d11, d12, d13, d14, d15] = vfpregisters
+
+# aliases for registers
+fp = r11
+ip = r12
+sp = r13
+lr = r14
+pc = r15
+vfp_ip = d15
+
+all_regs = [r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10]
+all_vfp_regs = vfpregisters[:-1]
+
+argument_regs = caller_resp = [r0, r1, r2, r3]
+callee_resp = [r4, r5, r6, r7, r8, r9, r10, fp]
+callee_saved_registers = callee_resp + [lr]
+callee_restored_registers = callee_resp + [pc]
+
+vfp_argument_regs = caller_vfp_resp = [d0, d1, d2, d3, d4, d5, d6, d7]
+callee_vfp_resp = [d8, d9, d10, d11, d12, d13, d14, d15]
+
+callee_saved_vfp_registers = callee_vfp_resp
diff --git a/pypy/jit/backend/arm/runner.py b/pypy/jit/backend/arm/runner.py
new file mode 100755
--- /dev/null
+++ b/pypy/jit/backend/arm/runner.py
@@ -0,0 +1,167 @@
+from pypy.jit.backend.arm.assembler import AssemblerARM
+from pypy.jit.backend.arm.registers import all_regs, all_vfp_regs
+from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
+from pypy.rpython.llinterp import LLInterpreter
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rlib.jit_hooks import LOOP_RUN_CONTAINER
+from pypy.jit.backend.arm.arch import FORCE_INDEX_OFS
+
+
+class AbstractARMCPU(AbstractLLCPU):
+
+    supports_floats = True
+    supports_longlong = False # XXX requires an implementation of
+                              # read_timestamp that works in user mode
+    
+    use_hf_abi = False        # use hard float abi flag
+
+    def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
+                 gcdescr=None):
+        if gcdescr is not None:
+            gcdescr.force_index_ofs = FORCE_INDEX_OFS
+        AbstractLLCPU.__init__(self, rtyper, stats, opts,
+                               translate_support_code, gcdescr)
+
+    def set_debug(self, flag):
+        return self.assembler.set_debug(flag)
+
+    def setup(self):
+        if self.opts is not None:
+            failargs_limit = self.opts.failargs_limit
+        else:
+            failargs_limit = 1000
+        self.assembler = AssemblerARM(self, failargs_limit=failargs_limit)
+
+    def setup_once(self):
+        self.assembler.setup_once()
+
+    def finish_once(self):
+        self.assembler.finish_once()
+
+    def compile_loop(self, inputargs, operations, looptoken,
+                                                    log=True, name=''):
+        return self.assembler.assemble_loop(name, inputargs, operations,
+                                                    looptoken, log=log)
+
+    def compile_bridge(self, faildescr, inputargs, operations,
+                                       original_loop_token, log=True):
+        clt = original_loop_token.compiled_loop_token
+        clt.compiling_a_bridge()
+        return self.assembler.assemble_bridge(faildescr, inputargs, operations,
+                                                original_loop_token, log=log)
+
+    def get_latest_value_float(self, index):
+        return self.assembler.fail_boxes_float.getitem(index)
+
+    def get_latest_value_int(self, index):
+        return self.assembler.fail_boxes_int.getitem(index)
+
+    def get_latest_value_ref(self, index):
+        return self.assembler.fail_boxes_ptr.getitem(index)
+
+    def get_latest_value_count(self):
+        return self.assembler.fail_boxes_count
+
+    def get_latest_force_token(self):
+        return self.assembler.fail_force_index
+
+    def get_on_leave_jitted_hook(self):
+        return self.assembler.leave_jitted_hook
+
+    def clear_latest_values(self, count):
+        setitem = self.assembler.fail_boxes_ptr.setitem
+        null = lltype.nullptr(llmemory.GCREF.TO)
+        for index in range(count):
+            setitem(index, null)
+
+    def make_execute_token(self, *ARGS):
+        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, lltype.Signed))
+
+        def execute_token(executable_token, *args):
+            clt = executable_token.compiled_loop_token
+            assert len(args) == clt._debug_nbargs
+            #
+            addr = executable_token._arm_func_addr
+            assert addr % 8 == 0
+            func = rffi.cast(FUNCPTR, addr)
+            #llop.debug_print(lltype.Void, ">>>> Entering", addr)
+            prev_interpreter = None   # help flow space
+            if not self.translate_support_code:
+                prev_interpreter = LLInterpreter.current_interpreter
+                LLInterpreter.current_interpreter = self.debug_ll_interpreter
+            try:
+                fail_index = func(*args)
+            finally:
+                if not self.translate_support_code:
+                    LLInterpreter.current_interpreter = prev_interpreter
+            #llop.debug_print(lltype.Void, "<<<< Back")
+            return self.get_fail_descr_from_number(fail_index)
+        return execute_token
+
+    def cast_ptr_to_int(x):
+        adr = llmemory.cast_ptr_to_adr(x)
+        return ArmCPU.cast_adr_to_int(adr)
+    cast_ptr_to_int._annspecialcase_ = 'specialize:arglltype(0)'
+    cast_ptr_to_int = staticmethod(cast_ptr_to_int)
+
+    all_null_registers = lltype.malloc(rffi.LONGP.TO,
+                        len(all_vfp_regs) * 2 + len(all_regs),
+                        flavor='raw', zero=True, immortal=True)
+
+    def force(self, addr_of_force_index):
+        TP = rffi.CArrayPtr(lltype.Signed)
+        fail_index = rffi.cast(TP, addr_of_force_index)[0]
+        assert fail_index >= 0, "already forced!"
+        faildescr = self.get_fail_descr_from_number(fail_index)
+        rffi.cast(TP, addr_of_force_index)[0] = ~fail_index
+        bytecode = self.assembler._find_failure_recovery_bytecode(faildescr)
+        addr_all_null_regsiters = rffi.cast(rffi.LONG, self.all_null_registers)
+        # start of "no gc operation!" block
+        fail_index_2 = self.assembler.failure_recovery_func(
+            bytecode,
+            addr_of_force_index,
+            addr_all_null_regsiters)
+        self.assembler.leave_jitted_hook()
+        # end of "no gc operation!" block
+        assert fail_index == fail_index_2
+        return faildescr
+
+    def redirect_call_assembler(self, oldlooptoken, newlooptoken):
+        self.assembler.redirect_call_assembler(oldlooptoken, newlooptoken)
+
+    def invalidate_loop(self, looptoken):
+        """Activate all GUARD_NOT_INVALIDATED in the loop and its attached
+        bridges.  Before this call, all GUARD_NOT_INVALIDATED do nothing;
+        after this call, they all fail.  Note that afterwards, if one such
+        guard fails often enough, it has a bridge attached to it; it is
+        possible then to re-call invalidate_loop() on the same looptoken,
+        which must invalidate all newer GUARD_NOT_INVALIDATED, but not the
+        old one that already has a bridge attached to it."""
+        from pypy.jit.backend.arm.codebuilder import ARMv7Builder
+
+        for jmp, tgt  in looptoken.compiled_loop_token.invalidate_positions:
+            mc = ARMv7Builder()
+            mc.B_offs(tgt)
+            mc.copy_to_raw_memory(jmp)
+        # positions invalidated
+        looptoken.compiled_loop_token.invalidate_positions = []
+
+    # should be combined with other ll backends
+    def get_all_loop_runs(self):
+        l = lltype.malloc(LOOP_RUN_CONTAINER,
+                          len(self.assembler.loop_run_counters))
+        for i, ll_s in enumerate(self.assembler.loop_run_counters):
+            l[i].type = ll_s.type
+            l[i].number = ll_s.number
+            l[i].counter = ll_s.i
+        return l
+
+class CPU_ARM(AbstractARMCPU):
+    """ARM v7 uses softfp ABI, requires vfp"""
+    pass
+ArmCPU = CPU_ARM
+
+class CPU_ARMHF(AbstractARMCPU):
+    """ARM v7 uses hardfp ABI, requires vfp"""
+    use_hf_abi = True
+    supports_floats = False
diff --git a/pypy/jit/backend/arm/shift.py b/pypy/jit/backend/arm/shift.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/shift.py
@@ -0,0 +1,6 @@
+# According to A8.4
+LSL = 0x0
+LSR = 0x1
+ASR = 0x2
+ROR = 0x3
+RRX = 0x3  # with imm = 0
diff --git a/pypy/jit/backend/arm/test/__init__.py b/pypy/jit/backend/arm/test/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/arm/test/conftest.py b/pypy/jit/backend/arm/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/conftest.py
@@ -0,0 +1,21 @@
+"""
+This conftest adds an option to run the translation tests which by default will
+be disabled.
+Also it disables the backend tests on non ARMv7 platforms
+"""
+import py, os
+from pypy.jit.backend import detect_cpu
+
+cpu = detect_cpu.autodetect()
+
+def pytest_addoption(parser):
+    group = parser.getgroup('translation test options')
+    group.addoption('--run-translation-tests',
+                    action="store_true",
+                    default=False,
+                    dest="run_translation_tests",
+                    help="run tests that translate code")
+
+def pytest_runtest_setup(item):
+    if cpu not in  ('arm', 'armhf'):
+        py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,))
diff --git a/pypy/jit/backend/arm/test/gen.py b/pypy/jit/backend/arm/test/gen.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/gen.py
@@ -0,0 +1,54 @@
+import os
+from pypy.tool.udir import udir
+import tempfile
+from pypy.jit.backend.arm.test.support import AS
+class ASMInstruction(object):
+
+    asm_opts = '-mfpu=neon -mcpu=cortex-a8 -march=armv7-a'
+    body = """.section .text
+.arm
+_start: .global _start
+        .global main
+        b main
+main:
+    .ascii "START"
+    %s
+    .ascii "END"
+"""
+    begin_tag = 'START'
+    end_tag = 'END'
+    base_name = 'test_%d.asm' 
+    index = 0
+
+    def __init__(self, instr):
+        self.instr = instr
+        self.file = udir.join(self.base_name % self.index)
+        while self.file.check():
+            self.index += 1
+            self.file = udir.join(self.base_name % self.index)
+
+    def encode(self):
+        f = open("%s/a.out" % (udir),'rb')
+        data = f.read()
+        #f.close()
+        i = data.find(self.begin_tag)
+        assert i>=0
+        j = data.find(self.end_tag, i)
+        assert j>=0
+        as_code = data[i+len(self.begin_tag):j]
+        return as_code
+
+
+
+    def assemble(self, *args):
+        res = self.body % (self.instr)
+        self.file.write(res)
+        os.system("%s --fatal-warnings %s %s -o %s/a.out" % (AS, self.asm_opts, self.file, udir))
+
+    #def __del__(self):
+    #    self.file.close()
+
+def assemble(instr):
+    a = ASMInstruction(instr)
+    a.assemble(instr)
+    return a.encode()
diff --git a/pypy/jit/backend/arm/test/support.py b/pypy/jit/backend/arm/test/support.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/support.py
@@ -0,0 +1,89 @@
+import os
+import py
+import pytest
+
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.metainterp.test import support
+from pypy.rlib.jit import JitDriver
+
+class JitARMMixin(support.LLJitMixin):
+    type_system = 'lltype'
+    CPUClass = getcpuclass()
+
+    def check_jumps(self, maxcount):
+        pass
+
+if os.uname()[1] == 'llaima.local':
+    AS = '~/Code/arm-jit/android/android-ndk-r4b//build/prebuilt/darwin-x86/arm-eabi-4.4.0/arm-eabi/bin/as'
+else:
+    AS = 'as'
+
+def run_asm(asm):
+    BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
+    addr = asm.mc.materialize(asm.cpu.asmmemmgr, [], None)
+    assert addr % 8 == 0
+    func = rffi.cast(lltype.Ptr(BOOTSTRAP_TP), addr)
+    asm.mc._dump_trace(addr, 'test.asm')
+    return func()
+
+def skip_unless_run_slow_tests():
+    if not pytest.config.option.run_slow_tests:
+        py.test.skip("use --slow to execute this long-running test")
+
+def requires_arm_as():
+    import commands
+    i = commands.getoutput("%s -version </dev/null -o /dev/null 2>&1" % AS)
+    check_skip(i)
+
+def check_skip(inp, search='arm', msg='only for arm'):
+    skip = True
+    try:
+        if inp.index(search) >= 0:
+            skip = False
+    finally:
+        if skip:
+            py.test.skip(msg)
+
+# generators for asm tests
+
+def gen_test_function(name, asm, args, kwargs=None, asm_ext=None):
+    if kwargs is None:
+        kwargs = {}
+    if asm_ext is None:
+        asm_ext = ''
+    def f(self):
+        func = getattr(self.cb, name)
+        func(*args, **kwargs)
+        try:
+            f_name = name[:name.index('_')]
+        except ValueError, e:
+            f_name = name
+        self.assert_equal('%s%s %s' % (f_name, asm_ext, asm))
+    return f
+
+def define_test(cls, name, test_case, base_name=None):
+    import types
+    if base_name is None:
+        base_name = ''
+    templ = 'test_generated_%s_%s'
+    test_name = templ % (base_name, name)
+    if hasattr(cls, test_name):
+        i = 1
+        new_test_name = test_name
+        while hasattr(cls, new_test_name):
+            new_test_name = '%s_%d' % (test_name, i)
+            i += 1
+        test_name = new_test_name
+    if not isinstance(test_case, types.FunctionType):
+        asm, sig = test_case[0:2]
+        kw_args = None
+        asm_ext = None
+        if len(test_case) > 2:
+            kw_args = test_case[2]
+        if len(test_case) > 3:
+            asm_ext = test_case[3]
+        f = gen_test_function(name, asm, sig, kw_args, asm_ext)
+    else:
+        f = test_case
+    setattr(cls, test_name, f)
diff --git a/pypy/jit/backend/arm/test/test_arch.py b/pypy/jit/backend/arm/test/test_arch.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_arch.py
@@ -0,0 +1,23 @@
+from pypy.jit.backend.arm import arch
+
+def test_mod():
+    assert arch.arm_int_mod(10, 2) == 0
+    assert arch.arm_int_mod(11, 2) == 1
+    assert arch.arm_int_mod(11, 3) == 2
+
+def test_mod2():
+    assert arch.arm_int_mod(-10, 2) == 0
+    assert arch.arm_int_mod(-11, 2) == -1
+    assert arch.arm_int_mod(-11, 3) == -2
+
+def test_mod3():
+    assert arch.arm_int_mod(10, -2) == 0
+    assert arch.arm_int_mod(11, -2) == 1
+    assert arch.arm_int_mod(11, -3) == 2
+
+
+def test_div():
+    assert arch.arm_int_div(-7, 2) == -3
+    assert arch.arm_int_div(9, 2) == 4
+    assert arch.arm_int_div(10, 5) == 2
+
diff --git a/pypy/jit/backend/arm/test/test_assembler.py b/pypy/jit/backend/arm/test/test_assembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_assembler.py
@@ -0,0 +1,252 @@
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm.arch import arm_int_div
+from pypy.jit.backend.arm.assembler import AssemblerARM
+from pypy.jit.backend.arm.locations import imm
+from pypy.jit.backend.arm.test.support import run_asm
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.metainterp.resoperation import rop
+
+from pypy.rpython.annlowlevel import llhelper
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.jit.metainterp.history import JitCellToken
+from pypy.jit.backend.model import CompiledLoopToken
+
+CPU = getcpuclass()
+
+
+class TestRunningAssembler(object):
+    def setup_method(self, method):
+        cpu = CPU(None, None)
+        self.a = AssemblerARM(cpu)
+        self.a.setup_once()
+        token = JitCellToken()
+        clt = CompiledLoopToken(cpu, 0)
+        clt.allgcrefs = []
+        token.compiled_loop_token = clt
+        self.a.setup(token, [])
+
+    def test_make_operation_list(self):
+        i = rop.INT_ADD
+        from pypy.jit.backend.arm import assembler
+        assert assembler.asm_operations[i] \
+            is AssemblerARM.emit_op_int_add.im_func
+
+    def test_load_small_int_to_reg(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, 123)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 123
+
+    def test_load_medium_int_to_reg(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, 0xBBD7)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 48087
+
+    def test_load_int_to_reg(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, 0xFFFFFF85)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == -123
+
+    def test_load_neg_int_to_reg(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, -110)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == -110
+
+    def test_load_neg_int_to_reg2(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, -3)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == -3
+
+    def test_load_int1(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, 440)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 440
+
+    def test_load_int2(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, 464)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 464
+
+    def test_or(self):
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r1.value, 8)
+        self.a.mc.MOV_ri(r.r2.value, 8)
+        self.a.mc.ORR_rr(r.r0.value, r.r1.value, r.r2.value, 4)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 0x88
+
+    def test_sub(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r1.value, 123456)
+        self.a.mc.SUB_ri(r.r0.value, r.r1.value, 123)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 123333
+
+    def test_cmp(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r1.value, 22)
+        self.a.mc.CMP_ri(r.r1.value, 123)
+        self.a.mc.MOV_ri(r.r0.value, 1, c.LE)
+        self.a.mc.MOV_ri(r.r0.value, 0, c.GT)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 1
+
+    def test_int_le_false(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r1.value, 2222)
+        self.a.mc.CMP_ri(r.r1.value, 123)
+        self.a.mc.MOV_ri(r.r0.value, 1, c.LE)
+        self.a.mc.MOV_ri(r.r0.value, 0, c.GT)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 0
+
+    def test_simple_jump(self):
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r1.value, 1)
+        loop_head = self.a.mc.currpos()
+        self.a.mc.CMP_ri(r.r1.value, 0)  # z=0, z=1
+        self.a.mc.MOV_ri(r.r1.value, 0, cond=c.NE)
+        self.a.mc.MOV_ri(r.r1.value, 7, cond=c.EQ)
+        self.a.mc.B_offs(loop_head, c.NE)
+        self.a.mc.MOV_rr(r.r0.value, r.r1.value)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 7
+
+    def test_jump(self):
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r1.value, 1)
+        loop_head = self.a.mc.currpos()
+        self.a.mc.ADD_ri(r.r1.value, r.r1.value, 1)
+        self.a.mc.CMP_ri(r.r1.value, 9)
+        self.a.mc.B_offs(loop_head, c.NE)
+        self.a.mc.MOV_rr(r.r0.value, r.r1.value)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 9
+
+    def test_B_offs_imm(self):
+        self.a.mc.PUSH([reg.value for reg in r.callee_saved_registers])
+        self.a.mc.MOV_ri(r.r0.value, 0)
+        self.a.mc.MOV_ri(r.r1.value, 0)
+        self.a.mc.CMP_rr(r.r0.value, r.r1.value)
+        pos = self.a.mc.currpos()
+        self.a.mc.MOV_ri(r.r0.value, 123, cond=c.NE)
+
+        for x in range(15):
+            self.a.mc.POP(
+                [reg.value for reg in r.callee_restored_registers], cond=c.NE)
+
+        self.a.mc.MOV_ri(r.r1.value, 33)
+        self.a.mc.MOV_ri(r.r0.value, 23)
+        self.a.mc.CMP_rr(r.r0.value, r.r1.value)
+        self.a.mc.B_offs(pos)
+        assert run_asm(self.a) == 123
+
+    def test_B_offs_reg(self):
+        self.a.mc.PUSH([reg.value for reg in r.callee_saved_registers])
+        self.a.mc.MOV_ri(r.r0.value, 0)
+        self.a.mc.MOV_ri(r.r1.value, 0)
+        self.a.mc.CMP_rr(r.r0.value, r.r1.value)
+        pos = self.a.mc.currpos()
+        self.a.mc.MOV_ri(r.r0.value, 123, cond=c.NE)
+
+        for x in range(100):
+            self.a.mc.POP(
+                [reg.value for reg in r.callee_restored_registers], cond=c.NE)
+
+        self.a.mc.MOV_ri(r.r1.value, 33)
+        self.a.mc.MOV_ri(r.r0.value, 23)
+        self.a.mc.CMP_rr(r.r0.value, r.r1.value)
+        self.a.mc.B_offs(pos)
+        assert run_asm(self.a) == 123
+
+    def test_call_python_func(self):
+        functype = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed))
+        call_addr = rffi.cast(lltype.Signed, llhelper(functype, callme))
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r0.value, 123)
+        self.a.mc.BL(call_addr)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 133
+
+    def test_division(self):
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r0.value, 123)
+        self.a.mc.MOV_ri(r.r1.value, 2)
+
+        # call to div
+        self.a.mc.PUSH(range(2, 12))
+        div_addr = rffi.cast(lltype.Signed, arm_int_div)
+        self.a.mc.BL(div_addr)
+        self.a.mc.POP(range(2, 12))
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 61
+
+    def test_DIV(self):
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r0.value, 123)
+        self.a.mc.MOV_ri(r.r1.value, 2)
+        self.a.mc.DIV()
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 61
+
+    def test_DIV2(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r0.value, -110)
+        self.a.mc.gen_load_int(r.r1.value, 3)
+        self.a.mc.DIV()
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == -36
+
+    def test_DIV3(self):
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r8.value, 110)
+        self.a.mc.gen_load_int(r.r9.value, -3)
+        self.a.mc.MOV_rr(r.r0.value, r.r8.value)
+        self.a.mc.MOV_rr(r.r1.value, r.r9.value)
+        self.a.mc.DIV()
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == -36
+
+    def test_bl_with_conditional_exec(self):
+        functype = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed))
+        call_addr = rffi.cast(lltype.Signed, llhelper(functype, callme))
+        self.a.gen_func_prolog()
+        self.a.mc.MOV_ri(r.r0.value, 123)
+        self.a.mc.CMP_ri(r.r0.value, 1)
+        self.a.mc.BL(call_addr, c.NE)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 133
+
+    def test_mov_small_imm_loc_to_loc(self):
+        self.a.gen_func_prolog()
+        self.a.mov_loc_loc(imm(12), r.r0)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 12
+
+    def test_mov_large_imm_loc_to_loc(self):
+        self.a.gen_func_prolog()
+        self.a.mov_loc_loc(imm(2478), r.r0)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == 2478
+
+    def test_load_store(self):
+        x = 0x60002224
+        self.a.gen_func_prolog()
+        self.a.mc.gen_load_int(r.r1.value, x)
+        self.a.mc.MOV_ri(r.r3.value, 8)
+        self.a.mc.STR_rr(r.r1.value, r.fp.value, r.r3.value)
+        self.a.mc.LDR_ri(r.r0.value, r.fp.value, 8)
+        self.a.gen_func_epilog()
+        assert run_asm(self.a) == x
+
+
+def callme(inp):
+    i = inp + 10
+    return i
diff --git a/pypy/jit/backend/arm/test/test_basic.py b/pypy/jit/backend/arm/test/test_basic.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_basic.py
@@ -0,0 +1,50 @@
+import py
+from pypy.jit.metainterp.test import test_ajit
+from pypy.rlib.jit import JitDriver
+from pypy.jit.backend.arm.test.support import JitARMMixin
+from pypy.jit.backend.detect_cpu import getcpuclass
+
+CPU = getcpuclass()
+
+class TestBasic(JitARMMixin, test_ajit.BaseLLtypeTests):
+    # for the individual tests see
+    # ====> ../../../metainterp/test/test_ajit.py
+    def test_bug(self):
+        jitdriver = JitDriver(greens = [], reds = ['n'])
+        class X(object):
+            pass
+        def f(n):
+            while n > -100:
+                jitdriver.can_enter_jit(n=n)
+                jitdriver.jit_merge_point(n=n)
+                x = X()
+                x.arg = 5
+                if n <= 0: break
+                n -= x.arg
+                x.arg = 6   # prevents 'x.arg' from being annotated as constant
+            return n
+        res = self.meta_interp(f, [31], enable_opts='')
+        assert res == -4
+
+    def test_r_dict(self):
+        # a Struct that belongs to the hash table is not seen as being
+        # included in the larger Array
+        py.test.skip("issue with ll2ctypes")
+
+    def test_free_object(self):
+        py.test.skip("issue of freeing, probably with ll2ctypes")
+
+
+    if not CPU.supports_longlong:
+        for k in dir(test_ajit.BaseLLtypeTests):
+            if k.find('longlong') < 0:
+                continue
+            locals()[k] = lambda self: py.test.skip('requires longlong support')
+
+    def test_read_timestamp(self):
+        py.test.skip("The JIT on ARM does not support read_timestamp")
+
+  
+    if not CPU.supports_floats:
+        for k in ('test_float', 'test_residual_external_call'):
+            locals()[k] = lambda self: py.test.skip('requires float support')
diff --git a/pypy/jit/backend/arm/test/test_calling_convention.py b/pypy/jit/backend/arm/test/test_calling_convention.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_calling_convention.py
@@ -0,0 +1,39 @@
+from pypy.rpython.annlowlevel import llhelper
+from pypy.jit.metainterp.history import JitCellToken
+from pypy.jit.backend.test.calling_convention_test import TestCallingConv, parse
+from pypy.rpython.lltypesystem import lltype
+from pypy.jit.codewriter.effectinfo import EffectInfo
+
+from pypy.jit.backend.arm.test.support import skip_unless_run_slow_tests
+skip_unless_run_slow_tests()
+
+class TestARMCallingConvention(TestCallingConv):
+    # ../../test/calling_convention_test.py
+
+    def test_call_argument_spilling(self):
+        # bug when we have a value in r0, that is overwritten by an argument
+        # and needed after the call, so that the register gets spilled after it
+        # was overwritten with the argument to the call
+        def func(a):
+            return a + 16
+
+        I = lltype.Signed
+        FUNC = self.FuncType([I], I)
+        FPTR = self.Ptr(FUNC)
+        func_ptr = llhelper(FPTR, func)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT, EffectInfo.MOST_GENERAL)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+
+        args = ', '.join(['i%d' % i for i in range(11)])
+        ops = """
+        [%s]
+        i99 = call(ConstClass(func_ptr), 22, descr=calldescr)
+        finish(%s, i99)""" % (args, args)
+        loop = parse(ops, namespace=locals())
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        args = [x for x in range(11)]
+        self.cpu.execute_token(looptoken, *args)
+        for x in range(11):
+            assert self.cpu.get_latest_value_int(x) == x
+        assert self.cpu.get_latest_value_int(11) == 38
diff --git a/pypy/jit/backend/arm/test/test_gc_integration.py b/pypy/jit/backend/arm/test/test_gc_integration.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_gc_integration.py
@@ -0,0 +1,228 @@
+
+""" Tests for register allocation for common constructs
+"""
+
+import py
+from pypy.jit.metainterp.history import BoxInt, \
+     BoxPtr, TreeLoop, TargetToken
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.codewriter import heaptracker
+from pypy.jit.backend.llsupport.descr import GcCache
+from pypy.jit.backend.llsupport.gc import GcLLDescription
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.arm.arch import WORD
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.rpython.annlowlevel import llhelper
+from pypy.rpython.lltypesystem import rclass
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
+
+from pypy.jit.backend.arm.test.test_regalloc import MockAssembler
+from pypy.jit.backend.arm.test.test_regalloc import BaseTestRegalloc
+from pypy.jit.backend.arm.regalloc import ARMFrameManager, VFPRegisterManager
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.backend.arm.regalloc import Regalloc, ARMv7RegisterManager
+
+CPU = getcpuclass()
+
+
+class MockGcRootMap(object):
+    is_shadow_stack = False
+
+    def get_basic_shape(self, is_64_bit):
+        return ['shape']
+
+    def add_frame_offset(self, shape, offset):
+        shape.append(offset)
+
+    def add_callee_save_reg(self, shape, reg_index):
+        index_to_name = {1: 'ebx', 2: 'esi', 3: 'edi'}
+        shape.append(index_to_name[reg_index])
+
+    def compress_callshape(self, shape, datablockwrapper):
+        assert datablockwrapper == 'fakedatablockwrapper'
+        assert shape[0] == 'shape'
+        return ['compressed'] + shape[1:]
+
+
+class MockGcDescr(GcCache):
+    get_malloc_slowpath_addr = None
+    write_barrier_descr = None
+    moving_gc = True
+    gcrootmap = MockGcRootMap()
+
+    def initialize(self):
+        pass
+
+    _record_constptrs = GcLLDescr_framework._record_constptrs.im_func
+    rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
+
+
+class TestRegallocGcIntegration(BaseTestRegalloc):
+    
+    cpu = CPU(None, None)
+    cpu.gc_ll_descr = MockGcDescr(False)
+    cpu.setup_once()
+    
+    S = lltype.GcForwardReference()
+    S.become(lltype.GcStruct('S', ('field', lltype.Ptr(S)),
+                             ('int', lltype.Signed)))
+
+    fielddescr = cpu.fielddescrof(S, 'field')
+
+    struct_ptr = lltype.malloc(S)
+    struct_ref = lltype.cast_opaque_ptr(llmemory.GCREF, struct_ptr)
+    child_ptr = lltype.nullptr(S)
+    struct_ptr.field = child_ptr
+
+
+    descr0 = cpu.fielddescrof(S, 'int')
+    ptr0 = struct_ref
+    targettoken = TargetToken()
+
+    namespace = locals().copy()
+
+    def test_basic(self):
+        ops = '''
+        [p0]
+        p1 = getfield_gc(p0, descr=fielddescr)
+        finish(p1)
+        '''
+        self.interpret(ops, [self.struct_ptr])
+        assert not self.getptr(0, lltype.Ptr(self.S))
+
+    def test_rewrite_constptr(self):
+        ops = '''
+        []
+        p1 = getfield_gc(ConstPtr(struct_ref), descr=fielddescr)
+        finish(p1)
+        '''
+        self.interpret(ops, [])
+        assert not self.getptr(0, lltype.Ptr(self.S))
+
+    def test_bug_0(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+        label(i0, i1, i2, i3, i4, i5, i6, i7, i8, descr=targettoken)
+        guard_value(i2, 1) [i2, i3, i4, i5, i6, i7, i0, i1, i8]
+        guard_class(i4, 138998336) [i4, i5, i6, i7, i0, i1, i8]
+        i11 = getfield_gc(i4, descr=descr0)
+        guard_nonnull(i11) [i4, i5, i6, i7, i0, i1, i11, i8]
+        i13 = getfield_gc(i11, descr=descr0)
+        guard_isnull(i13) [i4, i5, i6, i7, i0, i1, i11, i8]
+        i15 = getfield_gc(i4, descr=descr0)
+        i17 = int_lt(i15, 0)
+        guard_false(i17) [i4, i5, i6, i7, i0, i1, i11, i15, i8]
+        i18 = getfield_gc(i11, descr=descr0)
+        i19 = int_ge(i15, i18)
+        guard_false(i19) [i4, i5, i6, i7, i0, i1, i11, i15, i8]
+        i20 = int_lt(i15, 0)
+        guard_false(i20) [i4, i5, i6, i7, i0, i1, i11, i15, i8]
+        i21 = getfield_gc(i11, descr=descr0)
+        i22 = getfield_gc(i11, descr=descr0)
+        i23 = int_mul(i15, i22)
+        i24 = int_add(i21, i23)
+        i25 = getfield_gc(i4, descr=descr0)
+        i27 = int_add(i25, 1)
+        setfield_gc(i4, i27, descr=descr0)
+        i29 = getfield_raw(144839744, descr=descr0)
+        i31 = int_and(i29, -2141192192)
+        i32 = int_is_true(i31)
+        guard_false(i32) [i4, i6, i7, i0, i1, i24]
+        i33 = getfield_gc(i0, descr=descr0)
+        guard_value(i33, ConstPtr(ptr0)) [i4, i6, i7, i0, i1, i33, i24]
+        jump(i0, i1, 1, 17, i4, ConstPtr(ptr0), i6, i7, i24, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0, 0], run=False)
+
+NOT_INITIALIZED = chr(0xdd)
+
+class GCDescrFastpathMalloc(GcLLDescription):
+    gcrootmap = None
+    write_barrier_descr = None
+
+    def __init__(self):
+        GcLLDescription.__init__(self, None)
+        # create a nursery
+        NTP = rffi.CArray(lltype.Char)
+        self.nursery = lltype.malloc(NTP, 64, flavor='raw')
+        for i in range(64):
+            self.nursery[i] = NOT_INITIALIZED
+        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 2,
+                                   flavor='raw')
+        self.addrs[0] = rffi.cast(lltype.Signed, self.nursery)
+        self.addrs[1] = self.addrs[0] + 64
+        self.calls = []
+        def malloc_slowpath(size):
+            self.calls.append(size)
+            # reset the nursery
+            nadr = rffi.cast(lltype.Signed, self.nursery)
+            self.addrs[0] = nadr + size
+            return nadr
+        self.generate_function('malloc_nursery', malloc_slowpath,
+                               [lltype.Signed], lltype.Signed)
+
+    def get_nursery_free_addr(self):
+        return rffi.cast(lltype.Signed, self.addrs)
+
+    def get_nursery_top_addr(self):
+        return rffi.cast(lltype.Signed, self.addrs) + WORD
+
+    def get_malloc_slowpath_addr(self):
+        return self.get_malloc_fn_addr('malloc_nursery')
+
+    def check_nothing_in_nursery(self):
+        # CALL_MALLOC_NURSERY should not write anything in the nursery
+        for i in range(64):
+            assert self.nursery[i] == NOT_INITIALIZED
+
+class TestMallocFastpath(BaseTestRegalloc):
+
+    def setup_method(self, method):
+        cpu = CPU(None, None)
+        cpu.gc_ll_descr = GCDescrFastpathMalloc()
+        cpu.setup_once()
+        self.cpu = cpu
+
+    def test_malloc_fastpath(self):
+        ops = '''
+        []
+        p0 = call_malloc_nursery(16)
+        p1 = call_malloc_nursery(32)
+        p2 = call_malloc_nursery(16)
+        finish(p0, p1, p2)
+        '''
+        self.interpret(ops, [])
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+        ref = self.cpu.get_latest_value_ref
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 16
+        assert rffi.cast(lltype.Signed, ref(2)) == nurs_adr + 48
+        # check the nursery content and state
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.addrs[0] == nurs_adr + 64
+        # slowpath never called
+        assert gc_ll_descr.calls == []
+
+    def test_malloc_slowpath(self):
+        ops = '''
+        []
+        p0 = call_malloc_nursery(16)
+        p1 = call_malloc_nursery(32)
+        p2 = call_malloc_nursery(24)     # overflow
+        finish(p0, p1, p2)
+        '''
+        self.interpret(ops, [])
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+        ref = self.cpu.get_latest_value_ref
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 16
+        assert rffi.cast(lltype.Signed, ref(2)) == nurs_adr + 0
+        # check the nursery content and state
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.addrs[0] == nurs_adr + 24
+        # this should call slow path once
+        assert gc_ll_descr.calls == [24]
diff --git a/pypy/jit/backend/arm/test/test_generated.py b/pypy/jit/backend/arm/test/test_generated.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_generated.py
@@ -0,0 +1,525 @@
+import py
+from pypy.jit.metainterp.history import (AbstractFailDescr,
+                                         AbstractDescr,
+                                         BasicFailDescr,
+                                         BoxInt, Box, BoxPtr,
+                                         ConstInt, ConstPtr,
+                                         BoxObj, Const,
+                                         ConstObj, BoxFloat, ConstFloat)
+from pypy.jit.metainterp.history import JitCellToken
+from pypy.jit.metainterp.resoperation import ResOperation, rop
+from pypy.rpython.test.test_llinterp import interpret
+from pypy.jit.backend.detect_cpu import getcpuclass
+
+CPU = getcpuclass()
+class TestStuff(object):
+
+    def test0(self):
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_SUB, [ConstInt(-1073741824), v7], v11),
+            ResOperation(rop.INT_GE, [v3, ConstInt(23)], v12),
+            ResOperation(rop.GUARD_TRUE, [v12], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [v9, v6, v10, v2, v8, v5, v1, v4], None, descr=faildescr2),
+            ]
+        looptoken = JitCellToken()
+        operations[2].setfailargs([v12, v8, v3, v2, v1, v11])
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [-12 , -26 , -19 , 7 , -5 , -24 , -37 , 62 , 9 , 12]
+        op = cpu.execute_token(looptoken, *args)
+        assert cpu.get_latest_value_int(0) == 0
+        assert cpu.get_latest_value_int(1) == 62
+        assert cpu.get_latest_value_int(2) == -19
+        assert cpu.get_latest_value_int(3) == -26
+        assert cpu.get_latest_value_int(4) == -12
+        assert cpu.get_latest_value_int(5) == -1073741787
+
+    def test_overflow(self):
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        faildescr3 = BasicFailDescr(3)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        v16 = BoxInt()
+        v17 = BoxInt()
+        v18 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_SUB, [ConstInt(21), v5], v11),
+            ResOperation(rop.INT_MUL_OVF, [v8, v4], v12),
+            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
+            ResOperation(rop.UINT_LT, [v10, v3], v13),
+            ResOperation(rop.INT_IS_TRUE, [v3], v14),
+            ResOperation(rop.INT_XOR, [v9, v8], v15),
+            ResOperation(rop.INT_LE, [v12, v6], v16),
+            ResOperation(rop.UINT_GT, [v15, v5], v17),
+            ResOperation(rop.UINT_LE, [ConstInt(-9), v13], v18),
+            ResOperation(rop.GUARD_FALSE, [v13], None, descr=faildescr2),
+            ResOperation(rop.FINISH, [v7, v1, v2], None, descr=faildescr3),
+            ]
+        operations[2].setfailargs([v10, v6])
+        operations[9].setfailargs([v15, v7, v10, v18, v4, v17, v1])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [16 , 5 , 5 , 16 , 46 , 6 , 63 , 39 , 78 , 0]
+        op = cpu.execute_token(looptoken, *args)
+        assert cpu.get_latest_value_int(0) == 105
+        assert cpu.get_latest_value_int(1) == 63
+        assert cpu.get_latest_value_int(2) == 0
+        assert cpu.get_latest_value_int(3) == 0
+        assert cpu.get_latest_value_int(4) == 16
+        assert cpu.get_latest_value_int(5) == 1
+        assert cpu.get_latest_value_int(6) == 16
+
+    def test_sub_with_neg_const_first_arg(self):
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        faildescr3 = BasicFailDescr(3)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        tmp13 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_EQ, [ConstInt(17), v9], v11),
+            ResOperation(rop.INT_SUB_OVF, [ConstInt(-32), v7], v12),
+            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
+            ResOperation(rop.INT_IS_ZERO, [v12], tmp13),
+            ResOperation(rop.GUARD_TRUE, [tmp13], None, descr=faildescr2),
+            ResOperation(rop.FINISH, [v5, v2, v1, v10, v3, v8, v4, v6], None, descr=faildescr3)
+            ]
+        operations[2].setfailargs([v8, v3])
+        operations[4].setfailargs([v2, v12, v1, v3, v4])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [-5 , 24 , 46 , -15 , 13 , -8 , 0 , -6 , 6 , 6]
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 2
+        assert cpu.get_latest_value_int(0) == 24
+        assert cpu.get_latest_value_int(1) == -32
+        assert cpu.get_latest_value_int(2) == -5
+        assert cpu.get_latest_value_int(3) == 46
+        assert cpu.get_latest_value_int(4) == -15
+
+    def test_tempbox_spilling_in_sub(self):
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_LT, [v9, v9], v11),
+            ResOperation(rop.INT_ADD, [ConstInt(715827882), v4], v12),
+            ResOperation(rop.INT_NEG, [v11], v13),
+            ResOperation(rop.INT_IS_TRUE, [v3], v14),
+            ResOperation(rop.INT_SUB_OVF, [v3, ConstInt(-95)], v15),
+            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [v8, v2, v6, v5, v7, v1, v10], None, descr=faildescr2),
+            ]
+        operations[5].setfailargs([])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [19 , -3 , -58 , -7 , 12 , 22 , -54 , -29 , -19 , -64]
+        op = cpu.execute_token(looptoken, *args)
+        assert cpu.get_latest_value_int(0) == -29
+        assert cpu.get_latest_value_int(1) == -3
+        assert cpu.get_latest_value_int(2) == 22
+        assert cpu.get_latest_value_int(3) == 12
+        assert cpu.get_latest_value_int(4) == -54
+        assert cpu.get_latest_value_int(5) == 19
+        assert cpu.get_latest_value_int(6) == -64
+
+    def test_tempbox2(self):
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_LT, [v5, ConstInt(-67)], v11),
+            ResOperation(rop.INT_INVERT, [v2], v12),
+            ResOperation(rop.INT_SUB, [ConstInt(-45), v2], v13),
+            ResOperation(rop.INT_SUB, [ConstInt(99), v6], v14),
+            ResOperation(rop.INT_MUL_OVF, [v6, v9], v15),
+            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [v1, v4, v10, v8, v7, v3], None, descr=faildescr2),
+            ]
+        looptoken = JitCellToken()
+        operations[5].setfailargs([])
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [1073741824 , 95 , -16 , 5 , 92 , 12 , 32 , 17 , 37 , -63]
+        op = cpu.execute_token(looptoken, *args)
+        assert cpu.get_latest_value_int(0) == 1073741824
+        assert cpu.get_latest_value_int(1) == 5
+        assert cpu.get_latest_value_int(2) == -63
+        assert cpu.get_latest_value_int(3) == 17
+        assert cpu.get_latest_value_int(4) == 32
+        assert cpu.get_latest_value_int(5) == -16
+
+    def test_wrong_guard(self):
+        # generated by:
+        # ../test/ test/test_zll_random.py -l -k arm -s --block-length=10 --random-seed=4338
+
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        faildescr3 = BasicFailDescr(3)
+        faildescr4 = BasicFailDescr(4)
+        v1 = BoxInt(32)
+        v2 = BoxInt(41)
+        v3 = BoxInt(-9)
+        v4 = BoxInt(12)
+        v5 = BoxInt(-18)
+        v6 = BoxInt(46)
+        v7 = BoxInt(15)
+        v8 = BoxInt(17)
+        v9 = BoxInt(10)
+        v10 = BoxInt(12)
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        tmp15 = BoxInt()
+        tmp16 = BoxInt()
+        tmp17 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_IS_TRUE, [v1], tmp15),
+            ResOperation(rop.GUARD_TRUE, [tmp15], None, descr=faildescr1),
+            ResOperation(rop.INT_GT, [v4, v5], v11),
+            ResOperation(rop.INT_XOR, [ConstInt(-4), v7], v12),
+            ResOperation(rop.INT_MUL, [ConstInt(23), v11], v13),
+            ResOperation(rop.UINT_GE, [ConstInt(1), v13], v14),
+            ResOperation(rop.INT_IS_ZERO, [v14], tmp16),
+            ResOperation(rop.GUARD_TRUE, [tmp16], None, descr=faildescr2),
+            ResOperation(rop.INT_IS_TRUE, [v12], tmp17),
+            ResOperation(rop.GUARD_FALSE, [tmp17], None, descr=faildescr3),
+            ResOperation(rop.FINISH, [v8, v10, v6, v3, v2, v9], None, descr=faildescr4),
+            ]
+        looptoken = JitCellToken()
+        operations[1].setfailargs([v8, v6, v1])
+        operations[7].setfailargs([v4])
+        operations[9].setfailargs([v10, v13])
+        args = [32 , 41 , -9 , 12 , -18 , 46 , 15 , 17 , 10 , 12]
+        cpu.compile_loop(inputargs, operations, looptoken)
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 3
+        assert cpu.get_latest_value_int(0) == 12
+        assert cpu.get_latest_value_int(1) == 23
+
+    def test_wrong_guard2(self):
+        # random seed: 8029
+        # block length: 10
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        faildescr3 = BasicFailDescr(3)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        v16 = BoxInt()
+        tmp17 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_ADD_OVF, [v8, ConstInt(-30)], v11),
+            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
+            ResOperation(rop.UINT_LE, [v11, v1], v12),
+            ResOperation(rop.INT_AND, [v11, ConstInt(31)], tmp17),
+            ResOperation(rop.UINT_RSHIFT, [v12, tmp17], v13),
+            ResOperation(rop.INT_NE, [v3, v2], v14),
+            ResOperation(rop.INT_NE, [ConstInt(1), v11], v15),
+            ResOperation(rop.INT_NE, [ConstInt(23), v15], v16),
+            ResOperation(rop.GUARD_FALSE, [v15], None, descr=faildescr2),
+            ResOperation(rop.FINISH, [v4, v10, v6, v5, v9, v7], None, descr=faildescr3),
+            ]
+        operations[1].setfailargs([v6, v8, v1, v4])
+        operations[8].setfailargs([v5, v9])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [-8 , 0 , 62 , 35 , 16 , 9 , 30 , 581610154 , -1 , 738197503]
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 2
+        assert cpu.get_latest_value_int(0) == 16
+        assert cpu.get_latest_value_int(1) == -1
+
+    def test_wrong_guard3(self):
+        # random seed: 8029
+        # block length: 10
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        faildescr3 = BasicFailDescr(3)
+        faildescr4 = BasicFailDescr(4)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        v16 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.UINT_LT, [ConstInt(-11), v7], v11),
+            ResOperation(rop.INT_GE, [v3, v5], v12),
+            ResOperation(rop.INT_INVERT, [v9], v13),
+            ResOperation(rop.GUARD_VALUE, [v13, ConstInt(14)], None, descr=faildescr3),
+            ResOperation(rop.INT_IS_ZERO, [v12], v14),
+            ResOperation(rop.INT_SUB, [v2, v13], v15),
+            ResOperation(rop.GUARD_VALUE, [v15, ConstInt(-32)], None, descr=faildescr4),
+            ResOperation(rop.INT_FLOORDIV, [v3, ConstInt(805306366)], v16),
+            ResOperation(rop.GUARD_VALUE, [v15, ConstInt(0)], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [v10, v8, v1, v6, v4], None, descr=faildescr2),
+            ]
+        operations[3].setfailargs([])
+        operations[-4].setfailargs([v15])
+        operations[-2].setfailargs([v9, v4, v10, v11, v14])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [-39 , -18 , 1588243114 , -9 , -4 , 1252698794 , 0 , 715827882 , -15 , 536870912]
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 1
+        assert cpu.get_latest_value_int(0) == -15
+        assert cpu.get_latest_value_int(1) == -9
+        assert cpu.get_latest_value_int(2) == 536870912
+        assert cpu.get_latest_value_int(3) == 0
+        assert cpu.get_latest_value_int(4) == 0
+
+    def test_wrong_result(self):
+        # generated by:
+        # ../test/ test/test_zll_random.py -l -k arm -s --block-length=10 --random-seed=7389
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        faildescr3 = BasicFailDescr(3)
+        faildescr4 = BasicFailDescr(4)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        tmp16 = BoxInt()
+        tmp17 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_IS_TRUE, [v3], tmp16),
+            ResOperation(rop.GUARD_TRUE, [tmp16], None, descr=faildescr1),
+            ResOperation(rop.INT_AND, [v7, ConstInt(31)], tmp17),
+            ResOperation(rop.INT_RSHIFT, [v5, tmp17], v11),
+            ResOperation(rop.INT_OR, [v6, v8], v12),
+            ResOperation(rop.GUARD_VALUE, [v11, ConstInt(-2)], None, descr=faildescr2),
+            ResOperation(rop.INT_LE, [ConstInt(1789569706), v10], v13),
+            ResOperation(rop.INT_IS_TRUE, [v4], v14),
+            ResOperation(rop.INT_XOR, [v14, v3], v15),
+            ResOperation(rop.GUARD_VALUE, [v8, ConstInt(-8)], None, descr=faildescr3),
+            ResOperation(rop.FINISH, [v1, v2, v9], None, descr=faildescr4),
+            ]
+        operations[1].setfailargs([v9, v1])
+        operations[5].setfailargs([v10, v2, v11, v3])
+        operations[9].setfailargs([v5, v7, v12, v14, v2, v13, v8])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [0 , -2 , 24 , 1 , -4 , 13 , -95 , 33 , 2 , -44]
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 3
+        assert cpu.get_latest_value_int(0) == -4
+        assert cpu.get_latest_value_int(1) == -95
+        assert cpu.get_latest_value_int(2) == 45
+        assert cpu.get_latest_value_int(3) == 1
+        assert cpu.get_latest_value_int(4) == -2
+        assert cpu.get_latest_value_int(5) == 0
+        assert cpu.get_latest_value_int(6) == 33
+
+    def test_int_add(self):
+        # random seed: 1202
+        # block length: 4
+        # AssertionError: Got 1431655764, expected 357913940 for value #3
+        faildescr1 = BasicFailDescr(1)
+        faildescr2 = BasicFailDescr(2)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        tmp12 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_ADD, [ConstInt(-1073741825), v3], v11),
+            ResOperation(rop.INT_IS_TRUE, [v1], tmp12),
+            ResOperation(rop.GUARD_FALSE, [tmp12], None, descr=faildescr1),
+            ResOperation(rop.FINISH, [v8, v2, v10, v6, v7, v9, v5, v4], None, descr=faildescr2),
+            ]
+        operations[2].setfailargs([v10, v3, v6, v11, v9, v2])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [3 , -5 , 1431655765 , 47 , 12 , 1789569706 , 15 , 939524096 , 16 , -43]
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 1
+        assert cpu.get_latest_value_int(0) == -43
+        assert cpu.get_latest_value_int(1) == 1431655765
+        assert cpu.get_latest_value_int(2) == 1789569706
+        assert cpu.get_latest_value_int(3) == 357913940
+        assert cpu.get_latest_value_int(4) == 16
+        assert cpu.get_latest_value_int(5) == -5
+
+    def test_wrong_result2(self):
+        # block length 10
+        # random seed 1
+        f1 = BasicFailDescr(1)
+        f2 = BasicFailDescr(2)
+        f3 = BasicFailDescr(3)
+        v1 = BoxInt()
+        v2 = BoxInt()
+        v3 = BoxInt()
+        v4 = BoxInt()
+        v5 = BoxInt()
+        v6 = BoxInt()
+        v7 = BoxInt()
+        v8 = BoxInt()
+        v9 = BoxInt()
+        v10 = BoxInt()
+        v11 = BoxInt()
+        v12 = BoxInt()
+        v13 = BoxInt()
+        v14 = BoxInt()
+        v15 = BoxInt()
+        cpu = CPU(None, None)
+        cpu.setup_once()
+        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+        operations = [
+            ResOperation(rop.INT_LE, [v6, v1], v11),
+            ResOperation(rop.SAME_AS, [ConstInt(-14)], v12),
+            ResOperation(rop.INT_ADD, [ConstInt(24), v4], v13),
+            ResOperation(rop.UINT_RSHIFT, [v6, ConstInt(0)], v14),
+            ResOperation(rop.GUARD_VALUE, [v14, ConstInt(1)], None, descr=f3),
+            ResOperation(rop.INT_MUL, [v13, ConstInt(12)], v15),
+            ResOperation(rop.GUARD_FALSE, [v11], None, descr=f1),
+            ResOperation(rop.FINISH, [v2, v3, v5, v7, v10, v8, v9], None, descr=f2),
+            ]
+        operations[-2].setfailargs([v4, v10, v3, v9, v14, v2])
+        operations[4].setfailargs([v14])
+        looptoken = JitCellToken()
+        cpu.compile_loop(inputargs, operations, looptoken)
+        args = [14 , -20 , 18 , -2058005163 , 6 , 1 , -16 , 11 , 0 , 19]
+        op = cpu.execute_token(looptoken, *args)
+        assert op.identifier == 1
+        assert cpu.get_latest_value_int(0) == -2058005163
+        assert cpu.get_latest_value_int(1) == 19
+        assert cpu.get_latest_value_int(2) == 18
+        assert cpu.get_latest_value_int(3) == 0
+        assert cpu.get_latest_value_int(4) == 1
+        assert cpu.get_latest_value_int(5) == -20
diff --git a/pypy/jit/backend/arm/test/test_helper.py b/pypy/jit/backend/arm/test/test_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_helper.py
@@ -0,0 +1,20 @@
+from pypy.jit.backend.arm.helper.assembler import count_reg_args
+from pypy.jit.metainterp.history import (BoxInt, BoxPtr, BoxFloat,
+                                        INT, REF, FLOAT)
+
+
+def test_count_reg_args():
+    assert count_reg_args([BoxPtr()]) == 1
+    assert count_reg_args([BoxPtr()] * 2) == 2
+    assert count_reg_args([BoxPtr()] * 3) == 3
+    assert count_reg_args([BoxPtr()] * 4) == 4
+    assert count_reg_args([BoxPtr()] * 5) == 4
+    assert count_reg_args([BoxFloat()] * 1) == 1
+    assert count_reg_args([BoxFloat()] * 2) == 2
+    assert count_reg_args([BoxFloat()] * 3) == 2
+
+    assert count_reg_args([BoxInt(), BoxInt(), BoxFloat()]) == 3
+    assert count_reg_args([BoxInt(), BoxFloat(), BoxInt()]) == 2
+
+    assert count_reg_args([BoxInt(), BoxFloat(), BoxInt()]) == 2
+    assert count_reg_args([BoxInt(), BoxInt(), BoxInt(), BoxFloat()]) == 3
diff --git a/pypy/jit/backend/arm/test/test_instr_codebuilder.py b/pypy/jit/backend/arm/test/test_instr_codebuilder.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_instr_codebuilder.py
@@ -0,0 +1,302 @@
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm import codebuilder
+from pypy.jit.backend.arm import conditions
+from pypy.jit.backend.arm import instructions
+from pypy.jit.backend.arm.test.support import (requires_arm_as, define_test, gen_test_function)
+from gen import assemble
+import py
+
+requires_arm_as()
+
+class CodeBuilder(codebuilder.ARMv7Builder):
+    def __init__(self):
+        self.buffer = []
+
+    def writechar(self, char):
+        self.buffer.append(char)
+
+    def hexdump(self):
+        return ''.join(self.buffer)
+
+class ASMTest(object):
+    def assert_equal(self, asm):
+        assert self.cb.hexdump() == assemble(asm)
+
+
+class TestInstrCodeBuilder(ASMTest):
+    def setup_method(self, ffuu_method):
+        self.cb = CodeBuilder()
+
+    def test_ldr(self):
+        self.cb.LDR_ri(r.r0.value, r.r1.value)
+        self.assert_equal('LDR r0, [r1]')
+
+    def test_ldr_neg(self):
+        self.cb.LDR_ri(r.r3.value, r.fp.value, -16)
+        self.assert_equal('LDR r3, [fp, #-16]')
+
+    def test_add_ri(self):
+        self.cb.ADD_ri(r.r0.value, r.r1.value, 1)
+        self.assert_equal('ADD r0, r1, #1')
+
+    def test_mov_rr(self):
+        self.cb.MOV_rr(r.r7.value, r.r12.value)
+        self.assert_equal('MOV r7, r12')
+
+    def test_mov_ri(self):
+        self.cb.MOV_ri(r.r9.value, 123)
+        self.assert_equal('MOV r9, #123')
+
+    def test_mov_ri2(self):
+        self.cb.MOV_ri(r.r9.value, 255)
+        self.assert_equal('MOV r9, #255')
+
+    def test_mov_ri_max(self):
+        self.cb.MOV_ri(r.r9.value, 0xFF)
+        self.assert_equal('MOV r9, #255')
+
+    def test_str_ri(self):
+        self.cb.STR_ri(r.r9.value, r.r14.value)
+        self.assert_equal('STR r9, [r14]')
+
+    def test_str_ri_offset(self):
+        self.cb.STR_ri(r.r9.value, r.r14.value, 23)
+        self.assert_equal('STR r9, [r14, #23]')
+
+    def test_str_ri_offset(self):
+        self.cb.STR_ri(r.r9.value, r.r14.value, -20)
+        self.assert_equal('STR r9, [r14, #-20]')
+
+    def test_asr_ri(self):
+        self.cb.ASR_ri(r.r7.value, r.r5.value, 24)
+        self.assert_equal('ASR r7, r5, #24')
+
+    def test_orr_rr_no_shift(self):
+        self.cb.ORR_rr(r.r0.value, r.r7.value,r.r12.value)
+        self.assert_equal('ORR r0, r7, r12')
+
+    def test_orr_rr_lsl_8(self):
+        self.cb.ORR_rr(r.r0.value, r.r7.value,r.r12.value, 8)
+        self.assert_equal('ORR r0, r7, r12, lsl #8')
+
+    def test_push_one_reg(self):
+        self.cb.PUSH([r.r1.value])
+        self.assert_equal('PUSH {r1}')
+
+    def test_push_multiple(self):
+        self.cb.PUSH([reg.value for reg in [r.r1, r.r3, r.r6, r.r8, r.pc]])
+        self.assert_equal('PUSH {r1, r3, r6, r8, pc}')
+
+    def test_push_multiple2(self):
+        self.cb.PUSH([reg.value for reg in [r.fp, r.ip, r.lr, r.pc]])
+        self.assert_equal('PUSH {fp, ip, lr, pc}')
+
+    def test_vpush_one_reg(self):
+        self.cb.VPUSH([r.d3.value])
+        self.assert_equal('VPUSH {d3}')
+
+    def test_vpush_one_reg2(self):
+        self.cb.VPUSH([r.d12.value])
+        self.assert_equal('VPUSH {d12}')
+
+    def test_vpush_multiple(self):
+        self.cb.VPUSH([reg.value for reg in [r.d11, r.d12, r.d13, r.d14, r.d15]])
+        self.assert_equal('VPUSH {D11, D12, D13, D14, D15}')
+
+    def test_sub_ri(self):
+        self.cb.SUB_ri(r.r2.value, r.r4.value, 123)
+        self.assert_equal('SUB r2, r4, #123')
+
+    def test_sub_ri2(self):
+        self.cb.SUB_ri(r.r3.value, r.r7.value, 0xFF)
+        self.assert_equal('SUB r3, r7, #255')
+
+    def test_cmp_ri(self):
+        self.cb.CMP_ri(r.r3.value, 123)
+        self.assert_equal('CMP r3, #123')
+
+    def test_mcr(self):
+        self.cb.MCR(15, 0, r.r1.value, 7, 10,0)
+
+        self.assert_equal('MCR P15, 0, r1, c7, c10, 0')
+
+    def test_push_eq_stmdb(self):
+        # XXX check other conditions in STMDB
+        self.cb.PUSH([reg.value for reg in r.caller_resp], cond=conditions.AL)
+        self.assert_equal('STMDB SP!, {r0, r1, r2, r3}')
+
+    def test_push(self):
+        self.cb.PUSH([reg.value for reg in r.caller_resp], cond=conditions.AL)
+        self.assert_equal('PUSH {r0, r1, r2, r3}')
+
+    def test_push_raises_sp(self):
+        assert py.test.raises(AssertionError, 'self.cb.PUSH([r.sp.value])')
+
+    def test_pop(self):
+        self.cb.POP([reg.value for reg in r.caller_resp], cond=conditions.AL)
+        self.assert_equal('POP {r0, r1, r2, r3}')
+
+    def test_pop_eq_ldm(self):
+        # XXX check other conditions in LDM
+        self.cb.POP([reg.value for reg in r.caller_resp], cond=conditions.AL)
+        self.assert_equal('LDM SP!, {r0, r1, r2, r3}')
+
+    def test_double_add(self):
+        self.cb.VADD(r.d1.value, r.d2.value, r.d3.value, conditions.LE)
+        self.assert_equal("VADDLE.F64 D1, D2, D3")
+
+    def test_double_sub(self):
+        self.cb.VSUB(r.d1.value, r.d2.value, r.d3.value, conditions.GT)
+        self.assert_equal("VSUBGT.F64 D1, D2, D3")
+
+    def test_vstr_offset(self):
+        assert py.test.raises(AssertionError, 'self.cb.VSTR(r.d1, r.r4, 3)')
+
+    def test_vmrs(self):
+        self.cb.VMRS(conditions.AL)
+        self.assert_equal("vmrs APSR_nzcv, fpscr")
+
+    def test_movw(self):
+        self.cb.MOVW_ri(r.r3.value, 0xFFFF, conditions.NE)
+        self.assert_equal("MOVWNE r3, #65535")
+
+    def test_movt(self):
+        self.cb.MOVT_ri(r.r3.value, 0xFFFF, conditions.NE)
+        self.assert_equal("MOVTNE r3, #65535")
+
+class TestInstrCodeBuilderForGeneratedInstr(ASMTest):
+    def setup_method(self, ffuu_method):
+        self.cb = CodeBuilder()
+
+def gen_test_float_load_store_func(name, table):
+    tests = []
+    for c,v in [('EQ', conditions.EQ), ('LE', conditions.LE), ('AL', conditions.AL)]:
+        for reg in range(15):
+            for creg in range(2):
+                asm = 'd%d, [r%d]' % (creg, reg)
+                tests.append((asm, (creg, reg)))
+                asm = 'd%d, [r%d, #16]' % (creg, reg)
+                tests.append((asm, (creg, reg, 16)))
+    return tests
+
+def gen_test_float64_data_proc_instructions_func(name, table):
+    tests = []
+    for c,v in [('EQ', conditions.EQ), ('LE', conditions.LE), ('AL', conditions.AL)]:
+        for reg in range(15):
+            if 'result' in table and not table['result']:
+                asm = 'd%d, d2' % reg
+                tests.append((asm, (reg, r.d2.value), {}, '.F64'))
+            elif 'base' in table and not table['base']:
+                asm = 'd%d, d2' % reg
+                tests.append((asm, (reg, r.d2.value), {}, '.F64'))
+            else:
+                asm = 'd%d, d1, d2' % reg
+                tests.append((asm, (reg, r.d1.value, r.d2.value), {}, '.F64'))
+    return tests
+
+def gen_test_data_proc_imm_func(name, table):
+    if table['result'] and table['base']:
+        def f(self):
+            func = getattr(self.cb, name)
+            func(r.r3.value, r.r7.value, 23)
+            self.assert_equal('%s r3, r7, #23' % name[:name.index('_')])
+            py.test.raises(ValueError, 'func(r.r3.value, r.r7.value, -12)')
+        return [f]
+    else:
+        return [('r3, #23', [r.r3.value, 23])]
+
+def gen_test_load_store_func(name, table):
+    if table['imm']:
+        return [('r3, [r7, #23]', [r.r3.value, r.r7.value, 23]),
+            ('r3, [r7, #-23]', [r.r3.value, r.r7.value, -23])
+            ]
+    else:
+        return [('r3, [r7, r12]', [r.r3.value, r.r7.value, r.r12.value])]
+
+def gen_test_extra_load_store_func(name, table):
+    if name[-4] == 'D':
+        if name[-2:] == 'rr':
+            return [('r4, [r8, r12]', [r.r4.value, r.r5.value, r.r8.value, r.r12.value])]
+        else:
+            return [('r4, [r8, #223]', [r.r4.value, r.r5.value, r.r8.value, 223])]
+    else:
+        if name[-2:] == 'rr':
+            return [('r4, [r5, r12]', [r.r4.value, r.r5.value, r.r12.value])]
+        else:
+            return [('r4, [r5, #223]', [r.r4.value, r.r5.value, 223])]
+    return f
+
+def gen_test_multiply_func(name, table):
+    if 'acc' in table and table['acc']:
+        if 'update_flags' in table and table['update_flags']:
+            return [
+            ('r3, r7, r12, r13', (r.r3.value, r.r7.value, r.r12.value, r.r13.value)),
+            ('r3, r7, r12, r13', (r.r3.value, r.r7.value, r.r12.value, r.r13.value), {'s':1}, 'S')
+            ]
+        else:
+            return [('r3, r7, r12, r13', (r.r3.value, r.r7.value, r.r12.value,
+            r.r13.value))]
+    elif 'long' in table and table['long']:
+        return [('r3, r13, r7, r12', (r.r3.value, r.r13.value, r.r7.value, r.r12.value))]
+    else:
+        return [('r3, r7, r12', (r.r3.value, r.r7.value, r.r12.value))]
+
+def gen_test_data_proc_reg_shift_reg_func(name, table):
+    if name[-2:] == 'rr':
+        return [('r3, r7, r12', [r.r3.value, r.r7.value, r.r12.value])]
+    else:
+        result = 'result' not in table or table['result']
+        if result:
+            return [('r3, r7, r8, ASR r11', [r.r3.value, r.r7.value,
+                            r.r8.value, r.r11.value], {'shifttype':0x2})]
+        else:
+            return [('r3, r7, ASR r11', [r.r3.value, r.r7.value,
+                            r.r11.value], {'shifttype':0x2})]
+
+def gen_test_data_proc_func(name, table):
+    op_name = name[:name.index('_')]
+    if name[-2:] == 'ri':
+        return [('r3, r7, #12', (r.r3.value, r.r7.value, 12)),
+                ('r3, r7, #12', (r.r3.value, r.r7.value, 12), {'s':1}, 'S')]
+    elif table['base'] and table['result']:
+        return [('r3, r7, r12', (r.r3.value, r.r7.value, r.r12.value)),
+                ('r3, r7, r12', (r.r3.value, r.r7.value, r.r12.value), {'s':1}, 'S')]
+    else:
+        return [('r3, r7', [r.r3.value, r.r7.value])]
+
+def gen_test_supervisor_and_coproc_func(name, table):
+    def f(self):
+        py.test.skip('not used at the moment')
+    return [f]
+
+def gen_test_branch_func(name, table):
+    def f(self):
+        py.test.skip('not used at the moment')
+    return [f]
+
+def gen_test_block_data_func(name, table):
+    tests = []
+    for c,v in [('EQ', conditions.EQ), ('LE', conditions.LE), ('AL', conditions.AL)]:
+        for regs in range(16):
+            asm = 'r3, {%s}' % ','.join(['r%d' % i for i in range(regs+1)])
+            tests.append((asm, (r.r3.value, range(regs+1))))
+    return tests
+
+def gen_test_simd_instructions_3regs_func(name, table):
+    op_name = name[:name.index('_')]
+    return  [('d1, d2, d3', (r.d1.value, r.d2.value, r.d3.value), {}, '.i64')]
+
+def build_tests():
+    cls = TestInstrCodeBuilderForGeneratedInstr
+    test_name = 'test_generated_%s'
+    ins = [k for k in instructions.__dict__.keys() if not k.startswith('__')]
+    for name in ins:
+        try:
+            func = globals()['gen_test_%s_func' % name]
+        except KeyError:
+            print 'No test generator for %s instructions' % name
+            continue
+        for key, value in getattr(instructions, name).iteritems():
+            for test_case in func(key, value):
+                define_test(cls, key, test_case, name)
+build_tests()
diff --git a/pypy/jit/backend/arm/test/test_jump.py b/pypy/jit/backend/arm/test/test_jump.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_jump.py
@@ -0,0 +1,310 @@
+import random
+import py
+from pypy.jit.backend.x86.test.test_jump import MockAssembler
+from pypy.jit.backend.arm.registers import *
+from pypy.jit.backend.arm.locations import *
+from pypy.jit.backend.arm.regalloc import ARMFrameManager
+from pypy.jit.backend.arm.jump import remap_frame_layout, remap_frame_layout_mixed
+from pypy.jit.metainterp.history import INT
+
+frame_pos = ARMFrameManager.frame_pos
+
+class TestJump(object):
+    def setup_method(self, m):
+        self.assembler = MockAssembler()
+
+    def test_trivial(self):
+        remap_frame_layout(self.assembler, [], [], '?')
+        assert self.assembler.ops == []
+        remap_frame_layout(self.assembler, [r0, r1, r3, r5, r6, r7, r9],
+                                      [r0, r1, r3, r5, r6, r7, r9], '?')
+        assert self.assembler.ops == []
+        s8 = frame_pos(1, INT)
+        s12 = frame_pos(31, INT)
+        s20 = frame_pos(6, INT)
+        remap_frame_layout(self.assembler, [r0, r1, s20, s8, r3, r5, r6, s12, r7, r9],
+                                      [r0, r1, s20, s8, r3, r5, r6, s12, r7, r9],
+                                      '?')
+        assert self.assembler.ops == []
+
+    def test_simple_registers(self):
+        remap_frame_layout(self.assembler, [r0, r1, r2], [r3, r4, r5], '?')
+        assert self.assembler.ops == [('mov', r0, r3),
+                                 ('mov', r1, r4),
+                                 ('mov', r2, r5)]
+
+    def test_simple_framelocs(self):
+        s8 = frame_pos(0, INT)
+        s12 = frame_pos(13, INT)
+        s20 = frame_pos(20, INT)
+        s24 = frame_pos(221, INT)
+        remap_frame_layout(self.assembler, [s8, r7, s12], [s20, s24, r9], ip)
+        assert self.assembler.ops == [('mov', s8, ip),
+                                 ('mov', ip, s20),
+                                 ('mov', r7, s24),
+                                 ('mov', s12, r9)]
+
+    def test_reordering(self):
+        s8 = frame_pos(8, INT)
+        s12 = frame_pos(12, INT)
+        s20 = frame_pos(19, INT)
+        s24 = frame_pos(1, INT)
+        remap_frame_layout(self.assembler, [r7, s8, s20, r4],
+                                      [s8, r4, r7, r2], '?')
+        assert self.assembler.got([('mov', r4, r2),
+                              ('mov', s8, r4),
+                              ('mov', r7, s8),
+                              ('mov', s20, r7)])
+
+    def test_cycle(self):
+        s8 = frame_pos(8, INT)
+        s12 = frame_pos(12, INT)
+        s20 = frame_pos(19, INT)
+        s24 = frame_pos(1, INT)
+        remap_frame_layout(self.assembler, [r4, s8, s20, r7],
+                                      [s8, r7, r4, s20], '?')
+        assert self.assembler.got([('push', s8),
+                              ('mov', r4, s8),
+                              ('mov', s20, r4),
+                              ('mov', r7, s20),
+                              ('pop', r7)])
+
+    def test_cycle_2(self):
+        s8 = frame_pos(8, INT)
+        s12 = frame_pos(12, INT)
+        s20 = frame_pos(19, INT)
+        s24 = frame_pos(1, INT)
+        s2 = frame_pos(2, INT)
+        s3 = frame_pos(3, INT)
+        remap_frame_layout(self.assembler,
+                           [r0, s8, r1, s20, r0, s20, s24, r3, s2, s3],
+                           [s8, s20, r1, r0, r4, s24, r5, s12, s3, s2],
+                           ip)
+        assert self.assembler.got([('mov', r0, r4),
+                              ('mov', s24, r5),
+                              ('mov', r3, s12),
+                              ('mov', s20, ip),
+                              ('mov', ip, s24),
+                              ('push', s8),
+                              ('mov', r0, s8),
+                              ('mov', s20, r0),
+                              ('pop', s20),
+                              ('push', s3),
+                              ('mov', s2, ip),
+                              ('mov', ip, s3),
+                              ('pop', s2)])
+
+    def test_constants(self):
+        c3 = ImmLocation(3)
+        remap_frame_layout(self.assembler, [c3], [r0], '?')
+        assert self.assembler.ops == [('mov', c3, r0)]
+
+    def test_constants2(self):
+        c3 = ImmLocation(3)
+        s12 = frame_pos(12, INT)
+        remap_frame_layout(self.assembler, [c3], [s12], '?')
+        assert self.assembler.ops == [('mov', c3, s12)]
+
+    def test_constants_and_cycle(self):
+        c3 = ImmLocation(3)
+        s12 = frame_pos(13, INT)
+        remap_frame_layout(self.assembler, [r5, c3,  s12],
+                                      [s12, r0, r5], r1)
+        assert self.assembler.ops == [('mov', c3, r0),
+                                 ('push', s12),
+                                 ('mov', r5, s12),
+                                 ('pop', r5)]
+    def test_mixed(self):
+        s23 = frame_pos(2, FLOAT)     # non-conflicting locations
+        s4  = frame_pos(4, INT)
+        remap_frame_layout_mixed(self.assembler, [r1], [s4], 'tmp',
+                                            [s23], [d5], 'vfptmp')
+        assert self.assembler.ops == [('mov', r1, s4),
+                                 ('mov', s23, d5)]
+    def test_mixed2(self):
+        s23 = frame_pos(2, FLOAT)  # gets stored in pos 2 and 3, with value==3
+        s3  = frame_pos(3, INT)
+        remap_frame_layout_mixed(self.assembler, [r1], [s3], 'tmp',
+                                            [s23], [d5], 'vfptmp')
+        assert self.assembler.ops == [('push', s23),
+                                 ('mov', r1, s3),
+                                 ('pop', d5)]
+    def test_mixed3(self):
+        s23 = frame_pos(2, FLOAT)
+        s2  = frame_pos(2, INT)
+        remap_frame_layout_mixed(self.assembler, [r1], [s2], 'tmp',
+                                            [s23], [d5], 'vfptmp')
+        assert self.assembler.ops == [
+                                 ('push', s23),
+                                 ('mov', r1, s2),
+                                 ('pop', d5)]
+    def test_mixed4(self):
+        s23 = frame_pos(2, FLOAT)
+        s4  = frame_pos(4, INT)
+        s45 = frame_pos(4, FLOAT)
+        s1  = frame_pos(1, INT)
+        remap_frame_layout_mixed(self.assembler, [s4], [s1], r3,
+                                            [s23], [s45], d3)
+        assert self.assembler.ops == [('mov', s4, r3),
+                                 ('mov', r3, s1),
+                                 ('mov', s23, d3),
+                                 ('mov', d3, s45)]
+    def test_mixed5(self):
+        s2  = frame_pos(2, INT)
+        s23 = frame_pos(2, FLOAT)
+        s4  = frame_pos(4, INT)
+        s45 = frame_pos(4, FLOAT)
+        remap_frame_layout_mixed(self.assembler, [s4], [s2], r3,
+                                            [s23], [s45], d3)
+        assert self.assembler.ops == [('push', s23),
+                                 ('mov', s4, r3),
+                                 ('mov', r3, s2),
+                                 ('pop', s45)]
+    def test_mixed6(self):
+        s3  = frame_pos(3, INT)
+        s23 = frame_pos(2, FLOAT)
+        s4  = frame_pos(4, INT)
+        s45 = frame_pos(4, FLOAT)
+        remap_frame_layout_mixed(self.assembler, [s4], [s3], r3,
+                                     [s23], [s45], d3)
+        assert self.assembler.ops == [('push', s23),
+                                     ('mov', s4, r3),
+                                     ('mov', r3, s3),
+                                     ('pop', s45)]
+
+def test_random_mixed():
+    assembler = MockAssembler()
+    registers1 = all_regs
+    registers2 = all_vfp_regs
+    VFPWORDS = 2
+    #
+    def pick1():
+        n = random.randrange(-3, 10)
+        if n < 0:
+            return registers1[n]
+        else:
+            return frame_pos(n, INT)
+    def pick2():
+        n = random.randrange(-3 , 10 // VFPWORDS)
+        if n < 0:
+            return registers2[n]
+        else:
+            return frame_pos(n*VFPWORDS, FLOAT)
+    #
+    def pick1c():
+        n = random.randrange(-2000, 500)
+        if n >= 0:
+            return imm(n)
+        else:
+            return pick1()
+    #
+    def pick_dst(fn, count, seen):
+        result = []
+        while len(result) < count:
+            x = fn()
+            keys = [x.as_key()]
+            if x.is_stack() and x.width > WORD:
+                keys.append(keys[0] + 1)
+            for key in keys:
+                if key in seen:
+                    break
+            else:
+                for key in keys:
+                    seen[key] = True
+                result.append(x)
+        return result
+    #
+    def get_state(locations):
+        regs1 = {}
+        regs2 = {}
+        stack = {}
+        for i, loc in enumerate(locations):
+            if loc.is_vfp_reg():
+                if loc.width > WORD:
+                    newvalue = ('value-vfp-%d' % i,
+                                'value-vfp-hiword-%d' % i)
+                else:
+                    newvalue = 'value-vfp-%d' % i
+                regs2[loc.value] = newvalue
+            elif loc.is_reg():
+                regs1[loc.value] = 'value-int-%d' % i
+            elif loc.is_stack():
+                stack[loc.position] = 'value-width%d-%d' % (loc.width, i)
+                if loc.width > WORD:
+                    stack[loc.position+1] = 'value-hiword-%d' % i
+            else:
+                assert loc.is_imm() or loc.is_imm_float()
+        return regs1, regs2, stack
+    #
+    for i in range(500):
+        seen = {}
+        src_locations2 = [pick2() for i in range(4)]
+        dst_locations2 = pick_dst(pick2, 4, seen)
+        src_locations1 = [pick1c() for i in range(5)]
+        dst_locations1 = pick_dst(pick1, 5, seen)
+        assembler = MockAssembler()
+        remap_frame_layout_mixed(assembler,
+                                 src_locations1, dst_locations1, ip,
+                                 src_locations2, dst_locations2, vfp_ip)
+        #
+        regs1, regs2, stack = get_state(src_locations1 +
+                                        src_locations2)
+        #
+        def read(loc, expected_width=None):
+            if expected_width is not None:
+                assert loc.width == expected_width*WORD
+            if loc.is_vfp_reg():
+                return regs2[loc.value]
+            elif loc.is_reg():
+                return regs1[loc.value]
+            elif loc.is_stack():
+                got = stack[loc.position]
+                if loc.width > WORD:
+                    got = (got, stack[loc.position+1])
+                return got
+            if loc.is_imm() or loc.is_imm_float():
+                return 'const-%d' % loc.value
+            assert 0, loc
+        #
+        def write(loc, newvalue):
+            if loc.is_vfp_reg():
+                regs2[loc.value] = newvalue
+            elif loc.is_reg():
+                regs1[loc.value] = newvalue
+            elif loc.is_stack():
+                if loc.width > WORD:
+                    newval1, newval2 = newvalue
+                    stack[loc.position] = newval1
+                    stack[loc.position+1] = newval2
+                else:
+                    stack[loc.position] = newvalue
+            else:
+                assert 0, loc
+        #
+        src_values1 = [read(loc, 1) for loc in src_locations1]
+        src_values2 = [read(loc, 2)    for loc in src_locations2]
+        #
+        extrapushes = []
+        for op in assembler.ops:
+            if op[0] == 'mov':
+                src, dst = op[1:]
+                assert src.is_reg() or src.is_vfp_reg() or src.is_stack() or src.is_imm_float() or src.is_imm()
+                assert dst.is_reg() or dst.is_vfp_reg() or dst.is_stack()
+                assert not (src.is_stack() and dst.is_stack())
+                write(dst, read(src))
+            elif op[0] == 'push':
+                src, = op[1:]
+                assert src.is_reg() or src.is_vfp_reg() or src.is_stack()
+                extrapushes.append(read(src))
+            elif op[0] == 'pop':
+                dst, = op[1:]
+                assert dst.is_reg() or dst.is_vfp_reg() or dst.is_stack()
+                write(dst, extrapushes.pop())
+            else:
+                assert 0, "unknown op: %r" % (op,)
+        assert not extrapushes
+        #
+        for i, loc in enumerate(dst_locations1):
+            assert read(loc, 1) == src_values1[i]
+        for i, loc in enumerate(dst_locations2):
+            assert read(loc, 2) == src_values2[i]
diff --git a/pypy/jit/backend/arm/test/test_list.py b/pypy/jit/backend/arm/test/test_list.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_list.py
@@ -0,0 +1,8 @@
+
+from pypy.jit.metainterp.test.test_list import ListTests
+from pypy.jit.backend.arm.test.support import JitARMMixin
+
+class TestList(JitARMMixin, ListTests):
+    # for individual tests see
+    # ====> ../../../metainterp/test/test_list.py
+    pass
diff --git a/pypy/jit/backend/arm/test/test_loop_unroll.py b/pypy/jit/backend/arm/test/test_loop_unroll.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_loop_unroll.py
@@ -0,0 +1,8 @@
+import py
+from pypy.jit.backend.x86.test.test_basic import Jit386Mixin
+from pypy.jit.metainterp.test import test_loop_unroll
+
+class TestLoopSpec(Jit386Mixin, test_loop_unroll.LoopUnrollTest):
+    # for the individual tests see
+    # ====> ../../../metainterp/test/test_loop.py
+    pass
diff --git a/pypy/jit/backend/arm/test/test_recompilation.py b/pypy/jit/backend/arm/test/test_recompilation.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_recompilation.py
@@ -0,0 +1,150 @@
+from pypy.jit.backend.arm.test.test_regalloc import BaseTestRegalloc
+
+
+class TestRecompilation(BaseTestRegalloc):
+    def test_compile_bridge_not_deeper(self):
+        ops = '''
+        [i0]
+        label(i0, descr=targettoken)
+        i1 = int_add(i0, 1)
+        i2 = int_lt(i1, 20)
+        guard_true(i2, descr=fdescr1) [i1]
+        jump(i1, descr=targettoken)
+        '''
+        loop = self.interpret(ops, [0])
+        assert self.getint(0) == 20
+        ops = '''
+        [i1]
+        i3 = int_add(i1, 1)
+        finish(i3, descr=fdescr2)
+        '''
+        bridge = self.attach_bridge(ops, loop, -2)
+        fail = self.run(loop, 0)
+        assert fail.identifier == 2
+        assert self.getint(0) == 21
+
+    def test_compile_bridge_deeper(self):
+        ops = '''
+        [i0]
+        label(i0, descr=targettoken)
+        i1 = int_add(i0, 1)
+        i2 = int_lt(i1, 20)
+        guard_true(i2, descr=fdescr1) [i1]
+        jump(i1, descr=targettoken)
+        '''
+        loop = self.interpret(ops, [0])
+        previous = loop._jitcelltoken.compiled_loop_token.frame_depth
+        #assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
+        assert self.getint(0) == 20
+        ops = '''
+        [i1]
+        i3 = int_add(i1, 1)
+        i4 = int_add(i3, 1)
+        i5 = int_add(i4, 1)
+        i6 = int_add(i5, 1)
+        i7 = int_add(i5, i4)
+        force_spill(i5)
+        i8 = int_add(i7, 1)
+        i9 = int_add(i8, 1)
+        finish(i3, i4, i5, i6, i7, i8, i9, descr=fdescr2)
+        '''
+        bridge = self.attach_bridge(ops, loop, -2)
+        descr = loop.operations[3].getdescr()
+        new = descr._arm_bridge_frame_depth
+        #assert descr._x86_bridge_param_depth == 0
+        # the force_spill() forces the stack to grow
+        assert new > previous
+        fail = self.run(loop, 0)
+        assert fail.identifier == 2
+        assert self.getint(0) == 21
+        assert self.getint(1) == 22
+        assert self.getint(2) == 23
+        assert self.getint(3) == 24
+
+    def test_bridge_jump_to_other_loop(self):
+        loop = self.interpret('''
+        [i0, i10, i11, i12, i13, i14, i15, i16]
+        label(i0, i10, i11, i12, i13, i14, i15, i16, descr=targettoken)
+        i1 = int_add(i0, 1)
+        i2 = int_lt(i1, 20)
+        guard_true(i2, descr=fdescr1) [i1]
+        jump(i1, i10, i11, i12, i13, i14, i15, i16, descr=targettoken)
+        ''', [0, 0, 0, 0, 0, 0, 0, 0])
+        other_loop = self.interpret('''
+        [i3, i10, i11, i12, i13, i14, i15, i16]
+        label(i3, descr=targettoken2)
+        guard_false(i3, descr=fdescr2) [i3]
+        jump(i3, descr=targettoken2)
+        ''', [1, 0, 0, 0, 0, 0, 0, 0])
+        ops = '''
+        [i3]
+        jump(i3, 1, 2, 3, 4, 5, 6, 7, descr=targettoken)
+        '''
+        bridge = self.attach_bridge(ops, other_loop, 1)
+        fail = self.run(other_loop, 1, 0, 0, 0, 0, 0, 0, 0)
+        assert fail.identifier == 1
+
+    def test_bridge_jumps_to_self_deeper(self):
+        loop = self.interpret('''
+        [i0, i1, i2, i31, i32, i33]
+        label(i0, i1, i2, i31, i32, i33, descr=targettoken)
+        i98 = same_as(0)
+        i99 = same_as(1)
+        i30 = int_add(i1, i2)
+        i3 = int_add(i0, 1)
+        i4 = int_and(i3, 1)
+        guard_false(i4) [i98, i3]
+        i5 = int_lt(i3, 20)
+        guard_true(i5) [i99, i3]
+        jump(i3, i30, 1, i30, i30, i30, descr=targettoken)
+        ''', [0, 0, 0, 0, 0, 0])
+        assert self.getint(0) == 0
+        assert self.getint(1) == 1
+        ops = '''
+        [i97, i3]
+        i10 = int_mul(i3, 2)
+        i8 = int_add(i3, 1)
+        i6 = int_add(i8, i10)
+        i7 = int_add(i3, i6)
+        force_spill(i6)
+        force_spill(i7)
+        force_spill(i8)
+        i12 = int_add(i7, i8)
+        i11 = int_add(i12, i6)
+        jump(i3, i12, i11, i10, i6, i7, descr=targettoken)
+        '''
+        loop_frame_depth = loop._jitcelltoken.compiled_loop_token.frame_depth
+        bridge = self.attach_bridge(ops, loop, 6)
+        guard_op = loop.operations[6]
+        #assert loop._jitcelltoken.compiled_loop_token.param_depth == 0
+        # the force_spill() forces the stack to grow
+        #assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
+        #assert guard_op.getdescr()._x86_bridge_param_depth == 0
+        self.run(loop, 0, 0, 0, 0, 0, 0)
+        assert self.getint(0) == 1
+        assert self.getint(1) == 20
+
+    def test_bridge_jumps_to_self_shallower(self):
+        loop = self.interpret('''
+        [i0, i1, i2]
+        label(i0, i1, i2, descr=targettoken)
+        i98 = same_as(0)
+        i99 = same_as(1)
+        i3 = int_add(i0, 1)
+        i4 = int_and(i3, 1)
+        guard_false(i4) [i98, i3]
+        i5 = int_lt(i3, 20)
+        guard_true(i5) [i99, i3]
+        jump(i3, i1, i2, descr=targettoken)
+        ''', [0, 0, 0])
+        assert self.getint(0) == 0
+        assert self.getint(1) == 1
+        ops = '''
+        [i97, i3]
+        jump(i3, 0, 1, descr=targettoken)
+        '''
+        bridge = self.attach_bridge(ops, loop, 5)
+        self.run(loop, 0, 0, 0)
+        assert self.getint(0) == 1
+        assert self.getint(1) == 20
+        
diff --git a/pypy/jit/backend/arm/test/test_recursive.py b/pypy/jit/backend/arm/test/test_recursive.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_recursive.py
@@ -0,0 +1,8 @@
+
+from pypy.jit.metainterp.test.test_recursive import RecursiveTests
+from pypy.jit.backend.arm.test.support import JitARMMixin
+
+class TestRecursive(JitARMMixin, RecursiveTests):
+    # for the individual tests see
+    # ====> ../../../metainterp/test/test_recursive.py
+    pass
diff --git a/pypy/jit/backend/arm/test/test_regalloc.py b/pypy/jit/backend/arm/test/test_regalloc.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_regalloc.py
@@ -0,0 +1,863 @@
+
+""" Tests for register allocation for common constructs
+"""
+
+import py
+from pypy.jit.metainterp.history import BasicFailDescr, \
+                                        JitCellToken, \
+                                        TargetToken
+from pypy.jit.metainterp.resoperation import rop
+from pypy.jit.backend.llsupport.descr import GcCache
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.arm.regalloc import Regalloc, ARMFrameManager
+from pypy.jit.backend.llsupport.regalloc import is_comparison_or_ovf_op
+from pypy.jit.tool.oparser import parse
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.annlowlevel import llhelper
+from pypy.rpython.lltypesystem import rclass, rstr
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.codewriter import longlong
+
+
+def test_is_comparison_or_ovf_op():
+    assert not is_comparison_or_ovf_op(rop.INT_ADD)
+    assert is_comparison_or_ovf_op(rop.INT_ADD_OVF)
+    assert is_comparison_or_ovf_op(rop.INT_EQ)
+
+CPU = getcpuclass()
+
+
+class MockGcDescr(GcCache):
+    def get_funcptr_for_new(self):
+        return 123
+    get_funcptr_for_newarray = get_funcptr_for_new
+    get_funcptr_for_newstr = get_funcptr_for_new
+    get_funcptr_for_newunicode = get_funcptr_for_new
+
+    def rewrite_assembler(self, cpu, operations):
+        pass
+
+
+class MockAssembler(object):
+    gcrefs = None
+    _float_constants = None
+
+    def __init__(self, cpu=None, gc_ll_descr=None):
+        self.movs = []
+        self.performs = []
+        self.lea = []
+        if cpu is None:
+            cpu = CPU(None, None)
+            cpu.setup_once()
+        self.cpu = cpu
+        if gc_ll_descr is None:
+            gc_ll_descr = MockGcDescr(False)
+        self.cpu.gc_ll_descr = gc_ll_descr
+
+    def dump(self, *args):
+        pass
+
+    def regalloc_mov(self, from_loc, to_loc):
+        self.movs.append((from_loc, to_loc))
+
+    def regalloc_perform(self, op, arglocs, resloc):
+        self.performs.append((op, arglocs, resloc))
+
+    def regalloc_perform_discard(self, op, arglocs):
+        self.performs.append((op, arglocs))
+
+    def load_effective_addr(self, *args):
+        self.lea.append(args)
+
+
+class RegAllocForTests(Regalloc):
+    position = 0
+
+    def _compute_next_usage(self, v, _):
+        return -1
+
+
+class BaseTestRegalloc(object):
+    cpu = CPU(None, None)
+    cpu.setup_once()
+
+    def raising_func(i):
+        if i:
+            raise LLException(zero_division_error,
+                              zero_division_value)
+    FPTR = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Void))
+    raising_fptr = llhelper(FPTR, raising_func)
+
+    def f(a):
+        return 23
+
+    FPTR = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed))
+    f_fptr = llhelper(FPTR, f)
+    f_calldescr = cpu.calldescrof(FPTR.TO, FPTR.TO.ARGS, FPTR.TO.RESULT,
+                                                    EffectInfo.MOST_GENERAL)
+
+    zero_division_tp, zero_division_value = cpu.get_zero_division_error()
+    zd_addr = cpu.cast_int_to_adr(zero_division_tp)
+    zero_division_error = llmemory.cast_adr_to_ptr(zd_addr,
+                                            lltype.Ptr(rclass.OBJECT_VTABLE))
+    raising_calldescr = cpu.calldescrof(FPTR.TO, FPTR.TO.ARGS, FPTR.TO.RESULT,
+                                                    EffectInfo.MOST_GENERAL)
+
+    targettoken = TargetToken()
+    targettoken2 = TargetToken()
+    fdescr1 = BasicFailDescr(1)
+    fdescr2 = BasicFailDescr(2)
+    fdescr3 = BasicFailDescr(3)
+
+    def setup_method(self, meth):
+        self.targettoken._arm_loop_code = 0
+        self.targettoken2._arm_loop_code = 0
+
+    def f1(x):
+        return x + 1
+
+    def f2(x, y):
+        return x * y
+
+    def f10(*args):
+        assert len(args) == 10
+        return sum(args)
+
+    F1PTR = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed))
+    F2PTR = lltype.Ptr(lltype.FuncType([lltype.Signed] * 2, lltype.Signed))
+    F10PTR = lltype.Ptr(lltype.FuncType([lltype.Signed] * 10, lltype.Signed))
+    f1ptr = llhelper(F1PTR, f1)
+    f2ptr = llhelper(F2PTR, f2)
+    f10ptr = llhelper(F10PTR, f10)
+
+    f1_calldescr = cpu.calldescrof(F1PTR.TO, F1PTR.TO.ARGS, F1PTR.TO.RESULT,
+                                                    EffectInfo.MOST_GENERAL)
+    f2_calldescr = cpu.calldescrof(F2PTR.TO, F2PTR.TO.ARGS, F2PTR.TO.RESULT,
+                                                    EffectInfo.MOST_GENERAL)
+    f10_calldescr = cpu.calldescrof(F10PTR.TO, F10PTR.TO.ARGS,
+                                    F10PTR.TO.RESULT, EffectInfo.MOST_GENERAL)
+
+    namespace = locals().copy()
+    type_system = 'lltype'
+
+    def parse(self, s, boxkinds=None):
+        return parse(s, self.cpu, self.namespace,
+                     type_system=self.type_system,
+                     boxkinds=boxkinds)
+
+    def interpret(self, ops, args, run=True):
+        loop = self.parse(ops)
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        arguments = []
+        for arg in args:
+            if isinstance(arg, int):
+                arguments.append(arg)
+            elif isinstance(arg, float):
+                arg = longlong.getfloatstorage(arg)
+                arguments.append(arg)
+            else:
+                assert isinstance(lltype.typeOf(arg), lltype.Ptr)
+                llgcref = lltype.cast_opaque_ptr(llmemory.GCREF, arg)
+                arguments.append(llgcref)
+        loop._jitcelltoken = looptoken
+        if run:
+            self.cpu.execute_token(looptoken, *arguments)
+        return loop
+
+    def prepare_loop(self, ops):
+        loop = self.parse(ops)
+        regalloc = Regalloc(assembler=self.cpu.assembler,
+        frame_manager=ARMFrameManager())
+        regalloc.prepare_loop(loop.inputargs, loop.operations)
+        return regalloc
+
+    def getint(self, index):
+        return self.cpu.get_latest_value_int(index)
+
+    def getfloat(self, index):
+        v = self.cpu.get_latest_value_float(index)
+        return longlong.getrealfloat(v)
+
+    def getints(self, end):
+        return [self.cpu.get_latest_value_int(index) for
+                index in range(0, end)]
+
+    def getfloats(self, end):
+        return [self.getfloat(index) for
+                index in range(0, end)]
+
+    def getptr(self, index, T):
+        gcref = self.cpu.get_latest_value_ref(index)
+        return lltype.cast_opaque_ptr(T, gcref)
+
+    def attach_bridge(self, ops, loop, guard_op_index, **kwds):
+        guard_op = loop.operations[guard_op_index]
+        assert guard_op.is_guard()
+        bridge = self.parse(ops, **kwds)
+        assert ([box.type for box in bridge.inputargs] ==
+                [box.type for box in guard_op.getfailargs()])
+        faildescr = guard_op.getdescr()
+        self.cpu.compile_bridge(faildescr, bridge.inputargs, bridge.operations,
+                                loop._jitcelltoken)
+        return bridge
+
+    def run(self, loop, *args):
+        return self.cpu.execute_token(loop._jitcelltoken, *args)
+
+
+class TestRegallocSimple(BaseTestRegalloc):
+    def test_simple_loop(self):
+        ops = '''
+        [i0]
+        label(i0, descr=targettoken)
+        i1 = int_add(i0, 1)
+        i2 = int_lt(i1, 20)
+        guard_true(i2) [i1]
+        jump(i1, descr=targettoken)
+        '''
+        self.interpret(ops, [0])
+        assert self.getint(0) == 20
+
+    def test_two_loops_and_a_bridge(self):
+        ops = '''
+        [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
+        i4 = int_add(i0, 1)
+        i5 = int_lt(i4, 20)
+        guard_true(i5) [i4, i1, i2, i3]
+        jump(i4, i1, i2, i3, descr=targettoken)
+        '''
+        loop = self.interpret(ops, [0, 0, 0, 0])
+        ops2 = '''
+        [i5, i6, i7, i8]
+        label(i5, descr=targettoken2)
+        i1 = int_add(i5, 1)
+        i3 = int_add(i1, 1)
+        i4 = int_add(i3, 1)
+        i2 = int_lt(i4, 30)
+        guard_true(i2) [i4]
+        jump(i4, descr=targettoken2)
+        '''
+        loop2 = self.interpret(ops2, [0, 0, 0, 0])
+        bridge_ops = '''
+        [i4]
+        jump(i4, i4, i4, i4, descr=targettoken)
+        '''
+        bridge = self.attach_bridge(bridge_ops, loop2, 5)
+        self.run(loop2, 0, 0, 0, 0)
+        assert self.getint(0) == 31
+        assert self.getint(1) == 30
+        assert self.getint(2) == 30
+        assert self.getint(3) == 30
+
+    def test_pointer_arg(self):
+        ops = '''
+        [i0, p0]
+        label(i0, p0, descr=targettoken)
+        i1 = int_add(i0, 1)
+        i2 = int_lt(i1, 10)
+        guard_true(i2) [p0]
+        jump(i1, p0, descr=targettoken)
+        '''
+        S = lltype.GcStruct('S')
+        ptr = lltype.malloc(S)
+        self.cpu.clear_latest_values(2)
+        self.interpret(ops, [0, ptr])
+        assert self.getptr(0, lltype.Ptr(S)) == ptr
+
+    def test_exception_bridge_no_exception(self):
+        ops = '''
+        [i0]
+        i1 = same_as(1)
+        call(ConstClass(raising_fptr), i0, descr=raising_calldescr)
+        guard_exception(ConstClass(zero_division_error)) [i1]
+        finish(0)
+        '''
+        bridge_ops = '''
+        [i3]
+        i2 = same_as(2)
+        guard_no_exception() [i2]
+        finish(1)
+        '''
+        loop = self.interpret(ops, [0])
+        assert self.getint(0) == 1
+        bridge = self.attach_bridge(bridge_ops, loop, 2)
+        self.run(loop, 0)
+        assert self.getint(0) == 1
+
+    def test_inputarg_unused(self):
+        ops = '''
+        [i0]
+        finish(1)
+        '''
+        self.interpret(ops, [0])
+        # assert did not explode
+
+    def test_nested_guards(self):
+        ops = '''
+        [i0, i1]
+        guard_true(i0) [i0, i1]
+        finish(4)
+        '''
+        bridge_ops = '''
+        [i0, i1]
+        guard_true(i0) [i0, i1]
+        finish(3)
+        '''
+        loop = self.interpret(ops, [0, 10])
+        assert self.getint(0) == 0
+        assert self.getint(1) == 10
+        bridge = self.attach_bridge(bridge_ops, loop, 0)
+        self.run(loop, 0, 10)
+        assert self.getint(0) == 0
+        assert self.getint(1) == 10
+
+    def test_nested_unused_arg(self):
+        ops = '''
+        [i0, i1]
+        guard_true(i0) [i0, i1]
+        finish(1)
+        '''
+        loop = self.interpret(ops, [0, 1])
+        assert self.getint(0) == 0
+        bridge_ops = '''
+        [i0, i1]
+        finish(1, 2)
+        '''
+        self.attach_bridge(bridge_ops, loop, 0)
+        self.run(loop, 0, 1)
+
+    def test_spill_for_constant(self):
+        ops = '''
+        [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
+        i4 = int_add(3, i1)
+        i5 = int_lt(i4, 30)
+        guard_true(i5) [i0, i4, i2, i3]
+        jump(1, i4, 3, 4, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 0, 0, 0])
+        assert self.getints(4) == [1, 30, 3, 4]
+
+    def test_spill_for_constant_lshift(self):
+        ops = '''
+        [i0, i2, i1, i3]
+        label(i0, i2, i1, i3, descr=targettoken)
+        i4 = int_lshift(1, i1)
+        i5 = int_add(1, i1)
+        i6 = int_lt(i5, 30)
+        guard_true(i6) [i4, i5, i2, i3]
+        jump(i4, 3, i5, 4, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 0, 0, 0])
+        assert self.getints(4) == [1<<29, 30, 3, 4]
+        ops = '''
+        [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
+        i4 = int_lshift(1, i1)
+        i5 = int_add(1, i1)
+        i6 = int_lt(i5, 30)
+        guard_true(i6) [i4, i5, i2, i3]
+        jump(i4, i5, 3, 4, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 0, 0, 0])
+        assert self.getints(4) == [1<<29, 30, 3, 4]
+        ops = '''
+        [i0, i3, i1, i2]
+        label(i0, i3, i1, i2, descr=targettoken)
+        i4 = int_lshift(1, i1)
+        i5 = int_add(1, i1)
+        i6 = int_lt(i5, 30)
+        guard_true(i6) [i4, i5, i2, i3]
+        jump(i4, 4, i5, 3, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 0, 0, 0])
+        assert self.getints(4) == [1<<29, 30, 3, 4]
+
+    def test_result_selected_reg_via_neg(self):
+        ops = '''
+        [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
+        i6 = int_neg(i2)
+        i7 = int_add(1, i1)
+        i4 = int_lt(i7, 10)
+        guard_true(i4) [i0, i6, i7]
+        jump(1, i7, i2, i6, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 0, 3, 0])
+        assert self.getints(3) == [1, -3, 10]
+        
+    def test_compare_memory_result_survives(self):
+        ops = '''
+        [i0, i1, i2, i3]
+        label(i0, i1, i2, i3, descr=targettoken)
+        i4 = int_lt(i0, i1)
+        i5 = int_add(i3, 1)
+        i6 = int_lt(i5, 30)
+        guard_true(i6) [i4]
+        jump(i0, i1, i4, i5, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 10, 0, 0])
+        assert self.getint(0) == 1
+
+    def test_jump_different_args(self):
+        ops = '''
+        [i0, i15, i16, i18, i1, i2, i3]
+        label(i0, i15, i16, i18, i1, i2, i3, descr=targettoken)
+        i4 = int_add(i3, 1)
+        i5 = int_lt(i4, 20)
+        guard_true(i5) [i2, i1]
+        jump(i0, i18, i15, i16, i2, i1, i4, descr=targettoken)
+        '''
+        self.interpret(ops, [0, 1, 2, 3, 0, 0, 0])
+
+    def test_op_result_unused(self):
+        ops = '''
+        [i0, i1]
+        i2 = int_add(i0, i1)
+        finish(0)
+        '''
+        self.interpret(ops, [0, 0])
+
+    def test_guard_value_two_boxes(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6, i7]
+        guard_value(i6, i1) [i0, i2, i3, i4, i5, i6]
+        finish(i0, i2, i3, i4, i5, i6)
+        '''
+        self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0])
+        assert self.getint(0) == 0
+
+    def test_bug_wrong_stack_adj(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+        i9 = same_as(0)
+        guard_true(i0) [i9, i0, i1, i2, i3, i4, i5, i6, i7, i8]
+        finish(1, i0, i1, i2, i3, i4, i5, i6, i7, i8)
+        '''
+        loop = self.interpret(ops, [0, 1, 2, 3, 4, 5, 6, 7, 8])
+        assert self.getint(0) == 0
+        bridge_ops = '''
+        [i9, i0, i1, i2, i3, i4, i5, i6, i7, i8]
+        call(ConstClass(raising_fptr), 0, descr=raising_calldescr)
+        finish(i0, i1, i2, i3, i4, i5, i6, i7, i8)
+        '''
+        self.attach_bridge(bridge_ops, loop, 1)
+        self.run(loop, 0, 1, 2, 3, 4, 5, 6, 7, 8)
+        assert self.getints(9) == range(9)
+
+    def test_loopargs(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 4
+        assert len(regalloc.frame_manager.bindings) == 0
+
+    def test_loopargs_2(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        finish(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 4
+
+    def test_loopargs_3(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        guard_true(i4) [i0, i1, i2, i3, i4]
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 4
+
+
+class TestRegallocCompOps(BaseTestRegalloc):
+
+    def test_cmp_op_0(self):
+        ops = '''
+        [i0, i3]
+        i1 = same_as(1)
+        i2 = int_lt(i0, 100)
+        guard_true(i3) [i1, i2]
+        finish(0, i2)
+        '''
+        self.interpret(ops, [0, 1])
+        assert self.getint(0) == 0
+
+
+class TestRegallocMoreRegisters(BaseTestRegalloc):
+
+    cpu = BaseTestRegalloc.cpu
+    targettoken = TargetToken()
+
+    S = lltype.GcStruct('S', ('field', lltype.Char))
+    fielddescr = cpu.fielddescrof(S, 'field')
+
+    A = lltype.GcArray(lltype.Char)
+    I = lltype.GcArray(lltype.Signed)
+    arraydescr = cpu.arraydescrof(A)
+    arraydescr_i = cpu.arraydescrof(I)
+
+    namespace = locals().copy()
+
+    def test_int_is_true(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6, i7]
+        i10 = int_is_true(i0)
+        i11 = int_is_true(i1)
+        i12 = int_is_true(i2)
+        i13 = int_is_true(i3)
+        i14 = int_is_true(i4)
+        i15 = int_is_true(i5)
+        i16 = int_is_true(i6)
+        i17 = int_is_true(i7)
+        finish(i10, i11, i12, i13, i14, i15, i16, i17)
+        '''
+        self.interpret(ops, [0, 42, 12, 0, 13, 0, 0, 3333])
+        assert self.getints(8) == [0, 1, 1, 0, 1, 0, 0, 1]
+
+    def test_comparison_ops(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6]
+        i10 = int_lt(i0, i1)
+        i11 = int_le(i2, i3)
+        i12 = int_ge(i4, i5)
+        i13 = int_eq(i5, i6)
+        i14 = int_gt(i6, i2)
+        i15 = int_ne(i2, i6)
+        finish(i10, i11, i12, i13, i14, i15)
+        '''
+        self.interpret(ops, [0, 1, 2, 3, 4, 5, 6])
+        assert self.getints(6) == [1, 1, 0, 0, 1, 1]
+
+    def test_strsetitem(self):
+        ops = '''
+        [p0, i]
+        strsetitem(p0, 1, i)
+        finish()
+        '''
+        llstr = rstr.mallocstr(10)
+        self.interpret(ops, [llstr, ord('a')])
+        assert llstr.chars[1] == 'a'
+
+    def test_setfield_char(self):
+        ops = '''
+        [p0, i]
+        setfield_gc(p0, i, descr=fielddescr)
+        finish()
+        '''
+        s = lltype.malloc(self.S)
+        self.interpret(ops, [s, ord('a')])
+        assert s.field == 'a'
+
+    def test_setarrayitem_gc(self):
+        ops = '''
+        [p0, i]
+        setarrayitem_gc(p0, 1, i, descr=arraydescr)
+        finish()
+        '''
+        s = lltype.malloc(self.A, 3)
+        self.interpret(ops, [s, ord('a')])
+        assert s[1] == 'a'
+
+    def test_setarrayitem2_gc(self):
+        ops = '''
+        [p0, i, i1]
+        setarrayitem_gc(p0, i1, i, descr=arraydescr)
+        finish()
+        '''
+        s = lltype.malloc(self.A, 3)
+        self.interpret(ops, [s, ord('a'), 1])
+        assert s[1] == 'a'
+
+    def test_setarrayitem3_gc(self):
+        ops = '''
+        [p0, i0, i1]
+        setarrayitem_gc(p0, i1, i0, descr=arraydescr_i)
+        finish()
+        '''
+        s = lltype.malloc(self.I, 3)
+        self.interpret(ops, [s, 1234567890, 1])
+        assert s[1] == 1234567890
+
+    def test_setarrayitem4_gc(self):
+        ops = '''
+        [p0, i0]
+        setarrayitem_gc(p0, 1, i0, descr=arraydescr_i)
+        finish()
+        '''
+        s = lltype.malloc(self.I, 3)
+        self.interpret(ops, [s, 1234567890])
+        assert s[1] == 1234567890
+
+    def test_division_optimized(self):
+        ops = '''
+        [i7, i6]
+        label(i7, i6, descr=targettoken)
+        i18 = int_floordiv(i7, i6)
+        i19 = int_xor(i7, i6)
+        i21 = int_lt(i19, 0)
+        i22 = int_mod(i7, i6)
+        i23 = int_is_true(i22)
+        i24 = int_eq(i6, 4)
+        guard_false(i24) [i18]
+        jump(i18, i6, descr=targettoken)
+        '''
+        self.interpret(ops, [10, 4])
+        assert self.getint(0) == 2
+        # FIXME: Verify that i19 - i23 are removed
+
+
+class TestRegallocFloats(BaseTestRegalloc):
+    def test_float_add(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        ops = '''
+        [f0, f1]
+        f2 = float_add(f0, f1)
+        finish(f2, f0, f1)
+        '''
+        self.interpret(ops, [3.0, 1.5])
+        assert self.getfloats(3) == [4.5, 3.0, 1.5]
+
+    def test_float_adds_stack(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        ops = '''
+        [f0, f1, f2, f3, f4, f5, f6, f7, f8]
+        f9 = float_add(f0, f1)
+        f10 = float_add(f8, 3.5)
+        finish(f9, f10, f2, f3, f4, f5, f6, f7, f8)
+        '''
+        self.interpret(ops, [0.1, .2, .3, .4, .5, .6, .7, .8, .9])
+        assert self.getfloats(9) == [.1 + .2, .9 + 3.5, .3,
+                                        .4, .5, .6, .7, .8, .9]
+
+    def test_lt_const(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        ops = '''
+        [f0]
+        i1 = float_lt(3.5, f0)
+        finish(i1)
+        '''
+        self.interpret(ops, [0.1])
+        assert self.getint(0) == 0
+
+    def test_bug_float_is_true_stack(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
+        # NB. float_is_true no longer exists.  Unsure if keeping this test
+        # makes sense any more.
+        ops = '''
+        [f0, f1, f2, f3, f4, f5, f6, f7, f8, f9]
+        i0 = float_ne(f0, 0.0)
+        i1 = float_ne(f1, 0.0)
+        i2 = float_ne(f2, 0.0)
+        i3 = float_ne(f3, 0.0)
+        i4 = float_ne(f4, 0.0)
+        i5 = float_ne(f5, 0.0)
+        i6 = float_ne(f6, 0.0)
+        i7 = float_ne(f7, 0.0)
+        i8 = float_ne(f8, 0.0)
+        i9 = float_ne(f9, 0.0)
+        finish(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9)
+        '''
+        self.interpret(ops, [0.0, .1, .2, .3, .4, .5, .6, .7, .8, .9])
+        assert self.getints(9) == [0, 1, 1, 1, 1, 1, 1, 1, 1]
+
+
+class TestRegAllocCallAndStackDepth(BaseTestRegalloc):
+    def expected_param_depth(self, num_args):
+        # Assumes the arguments are all non-float
+        return num_args
+
+    def test_one_call(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
+        i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
+        finish(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9)
+        '''
+        self.interpret(ops, [4, 7, 9, 9, 9, 9, 9, 9, 9, 9])
+        assert self.getints(10) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+
+    def test_two_calls(self):
+        ops = '''
+        [i0, i1,  i2, i3, i4, i5, i6, i7, i8, i9]
+        i10 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
+        i11 = call(ConstClass(f2ptr), i10, i1, descr=f2_calldescr)
+        finish(i11, i1,  i2, i3, i4, i5, i6, i7, i8, i9)
+        '''
+        self.interpret(ops, [4, 7, 9, 9, 9, 9, 9, 9, 9, 9])
+        assert self.getints(10) == [5 * 7, 7, 9, 9, 9, 9, 9, 9, 9, 9]
+
+    def test_call_many_arguments(self):
+        ops = '''
+        [i0, i1, i2, i3, i4, i5, i6, i7]
+        i8 = call(ConstClass(f10ptr), 1, i0, i1, i2, i3, i4, i5, i6, i7, 10, descr=f10_calldescr)
+        finish(i8)
+        '''
+        self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
+        assert self.getint(0) == 55
+
+    def test_bridge_calls_1(self):
+        ops = '''
+        [i0, i1]
+        i2 = call(ConstClass(f1ptr), i0, descr=f1_calldescr)
+        guard_value(i2, 0, descr=fdescr1) [i2, i1]
+        finish(i1)
+        '''
+        loop = self.interpret(ops, [4, 7])
+        assert self.getint(0) == 5
+        ops = '''
+        [i2, i1]
+        i3 = call(ConstClass(f2ptr), i2, i1, descr=f2_calldescr)
+        finish(i3, descr=fdescr2)
+        '''
+        self.attach_bridge(ops, loop, -2)
+
+        self.run(loop, 4, 7)
+        assert self.getint(0) == 5 * 7
+
+    def test_bridge_calls_2(self):
+        ops = '''
+        [i0, i1]
+        i2 = call(ConstClass(f2ptr), i0, i1, descr=f2_calldescr)
+        guard_value(i2, 0, descr=fdescr1) [i2]
+        finish(i1)
+        '''
+        loop = self.interpret(ops, [4, 7])
+        assert self.getint(0) == 4 * 7
+        ops = '''
+        [i2]
+        i3 = call(ConstClass(f1ptr), i2, descr=f1_calldescr)
+        finish(i3, descr=fdescr2)
+        '''
+        self.attach_bridge(ops, loop, -2)
+
+        self.run(loop, 4, 7)
+        assert self.getint(0) == 29
+
+
+class TestJumps(BaseTestRegalloc):
+    def test_jump_with_consts(self):
+        loop = """
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14]
+        label(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, descr=targettoken)
+        jump(i1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, descr=targettoken)
+        """
+        self.interpret(loop, range(15), run=False)
+        # ensure compiling this loop works
+        assert 1
+
+    def test_from_loop_to_loop(self):
+        def assembler_helper(failindex, virtualizable):
+            return 3
+
+        FUNCPTR = lltype.Ptr(lltype.FuncType([lltype.Signed, llmemory.GCREF],
+                                             lltype.Signed))
+
+        class FakeJitDriverSD:
+            index_of_virtualizable = -1
+            _assembler_helper_ptr = llhelper(FUNCPTR, assembler_helper)
+            assembler_helper_adr = llmemory.cast_ptr_to_adr(
+                _assembler_helper_ptr)
+
+        FakeJitDriverSD.portal_calldescr = self.cpu.calldescrof(
+            lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed)), \
+                    [lltype.Signed], lltype.Signed, EffectInfo.MOST_GENERAL)
+        loop1 = """
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
+        i11 = int_add(i0, i1)
+        finish(i11, i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10)
+        """
+        large = self.interpret(loop1, range(11), run=False)
+        large._jitcelltoken.outermost_jitdriver_sd = FakeJitDriverSD()
+        self.namespace['looptoken'] = large._jitcelltoken
+        assert self.namespace['looptoken']._arm_func_addr != 0
+        loop2 = """
+        [i0]
+        i1 = force_token()
+        i2 = call_assembler(1,2,3,4,5,6,7,8,9,10,11, descr=looptoken)
+        guard_not_forced() [i0]
+        finish(i0, i2)
+        """
+
+        self.interpret(loop2, [110])
+        assert self.getint(0) == 110
+        assert self.getint(1) == 3
+
+    def test_far_far_jump(self):
+        ops = """
+        [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
+        label(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, descr=targettoken)
+        i11 = int_add(i0, 1)
+        i12 = int_lt(i11, 2)
+        i13 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i14 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i15 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i16 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i17 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i18 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i19 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i20 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i21 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i22 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i23 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i24 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i26 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i27 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i28 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i29 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        i30 = call(ConstClass(f_fptr), i12, descr=f_calldescr)
+        guard_true(i12) [i11, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
+        jump(i11, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, descr=targettoken)
+        """
+        self.interpret(ops, range(11))
+        assert self.getint(0) == 2  # and not segfault()
+
+
+class TestStrOps(BaseTestRegalloc):
+    def test_newstr(self):
+        ops = """
+        [i0]
+        p1 = newstr(300)
+        i2 = strlen(p1)
+        finish(i2)
+        """
+        self.interpret(ops, [0])
+        assert self.getint(0) == 300
+        ops = """
+        [i0]
+        p1 = newstr(i0)
+        i2 = strlen(p1)
+        finish(i2)
+        """
+        self.interpret(ops, [300])
+        assert self.getint(0) == 300
+
+    def test_strlen(self):
+        s = rstr.mallocstr(300)
+        ops = """
+        [p0]
+        i1 = strlen(p0)
+        finish(i1)
+        """
+        self.interpret(ops, [s])
+        assert self.getint(0) == 300
+
+    def test_len_of_newstr(self):
+        ops = """
+        []
+        p0 = newstr(300)
+        finish(p0)
+        """
+        self.interpret(ops, [])
+        string = self.getptr(0, lltype.Ptr(rstr.STR))
+        assert len(string.chars) == 300
diff --git a/pypy/jit/backend/arm/test/test_regalloc2.py b/pypy/jit/backend/arm/test/test_regalloc2.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_regalloc2.py
@@ -0,0 +1,275 @@
+import py
+from pypy.jit.metainterp.history import ResOperation, BoxInt, ConstInt,\
+     BoxPtr, ConstPtr, BasicFailDescr
+from pypy.jit.metainterp.history import JitCellToken
+from pypy.jit.metainterp.resoperation import rop
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.arm.arch import WORD
+CPU = getcpuclass()
+
+def test_bug_rshift():
+    v1 = BoxInt()
+    v2 = BoxInt()
+    v3 = BoxInt()
+    v4 = BoxInt()
+    inputargs = [v1]
+    operations = [
+        ResOperation(rop.INT_ADD, [v1, v1], v2),
+        ResOperation(rop.INT_INVERT, [v2], v3),
+        ResOperation(rop.UINT_RSHIFT, [v1, ConstInt(3)], v4),
+        ResOperation(rop.FINISH, [v4, v3], None, descr=BasicFailDescr()),
+        ]
+    cpu = CPU(None, None)
+    cpu.setup_once()
+    looptoken = JitCellToken()
+    cpu.compile_loop(inputargs, operations, looptoken)
+    cpu.execute_token(looptoken, 9)
+    assert cpu.get_latest_value_int(0) == (9 >> 3)
+    assert cpu.get_latest_value_int(1) == (~18)
+
+def test_bug_int_is_true_1():
+    v1 = BoxInt()
+    v2 = BoxInt()
+    v3 = BoxInt()
+    v4 = BoxInt()
+    tmp5 = BoxInt()
+    inputargs = [v1]
+    operations = [
+        ResOperation(rop.INT_MUL, [v1, v1], v2),
+        ResOperation(rop.INT_MUL, [v2, v1], v3),
+        ResOperation(rop.INT_IS_TRUE, [v2], tmp5),
+        ResOperation(rop.INT_IS_ZERO, [tmp5], v4),
+        ResOperation(rop.FINISH, [v4, v3, tmp5], None, descr=BasicFailDescr()),
+            ]
+    cpu = CPU(None, None)
+    cpu.setup_once()
+    looptoken = JitCellToken()
+    cpu.compile_loop(inputargs, operations, looptoken)
+    cpu.execute_token(looptoken, -10)
+    assert cpu.get_latest_value_int(0) == 0
+    assert cpu.get_latest_value_int(1) == -1000
+    assert cpu.get_latest_value_int(2) == 1
+
+def test_bug_0():
+    v1 = BoxInt()
+    v2 = BoxInt()
+    v3 = BoxInt()
+    v4 = BoxInt()
+    v5 = BoxInt()
+    v6 = BoxInt()
+    v7 = BoxInt()
+    v8 = BoxInt()
+    v9 = BoxInt()
+    v10 = BoxInt()
+    v11 = BoxInt()
+    v12 = BoxInt()
+    v13 = BoxInt()
+    v14 = BoxInt()
+    v15 = BoxInt()
+    v16 = BoxInt()
+    v17 = BoxInt()
+    v18 = BoxInt()
+    v19 = BoxInt()
+    v20 = BoxInt()
+    v21 = BoxInt()
+    v22 = BoxInt()
+    v23 = BoxInt()
+    v24 = BoxInt()
+    v25 = BoxInt()
+    v26 = BoxInt()
+    v27 = BoxInt()
+    v28 = BoxInt()
+    v29 = BoxInt()
+    v30 = BoxInt()
+    v31 = BoxInt()
+    v32 = BoxInt()
+    v33 = BoxInt()
+    v34 = BoxInt()
+    v35 = BoxInt()
+    v36 = BoxInt()
+    v37 = BoxInt()
+    v38 = BoxInt()
+    v39 = BoxInt()
+    v40 = BoxInt()
+    tmp41 = BoxInt()
+    tmp42 = BoxInt()
+    tmp43 = BoxInt()
+    tmp44 = BoxInt()
+    tmp45 = BoxInt()
+    tmp46 = BoxInt()
+    inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+    operations = [
+        ResOperation(rop.UINT_GT, [v3, ConstInt(-48)], v11),
+        ResOperation(rop.INT_XOR, [v8, v1], v12),
+        ResOperation(rop.INT_GT, [v6, ConstInt(-9)], v13),
+        ResOperation(rop.INT_LE, [v13, v2], v14),
+        ResOperation(rop.INT_LE, [v11, v5], v15),
+        ResOperation(rop.UINT_GE, [v13, v13], v16),
+        ResOperation(rop.INT_OR, [v9, ConstInt(-23)], v17),
+        ResOperation(rop.INT_LT, [v10, v13], v18),
+        ResOperation(rop.INT_OR, [v15, v5], v19),
+        ResOperation(rop.INT_XOR, [v17, ConstInt(54)], v20),
+        ResOperation(rop.INT_MUL, [v8, v10], v21),
+        ResOperation(rop.INT_OR, [v3, v9], v22),
+        ResOperation(rop.INT_AND, [v11, ConstInt(-4)], tmp41),
+        ResOperation(rop.INT_OR, [tmp41, ConstInt(1)], tmp42),
+        ResOperation(rop.INT_MOD, [v12, tmp42], v23),
+        ResOperation(rop.INT_IS_TRUE, [v6], v24),
+        ResOperation(rop.UINT_RSHIFT, [v15, ConstInt(6)], v25),
+        ResOperation(rop.INT_OR, [ConstInt(-4), v25], v26),
+        ResOperation(rop.INT_INVERT, [v8], v27),
+        ResOperation(rop.INT_SUB, [ConstInt(-113), v11], v28),
+        ResOperation(rop.INT_NEG, [v7], v29),
+        ResOperation(rop.INT_NEG, [v24], v30),
+        ResOperation(rop.INT_FLOORDIV, [v3, ConstInt(53)], v31),
+        ResOperation(rop.INT_MUL, [v28, v27], v32),
+        ResOperation(rop.INT_AND, [v18, ConstInt(-4)], tmp43),
+        ResOperation(rop.INT_OR, [tmp43, ConstInt(1)], tmp44),
+        ResOperation(rop.INT_MOD, [v26, tmp44], v33),
+        ResOperation(rop.INT_OR, [v27, v19], v34),
+        ResOperation(rop.UINT_LT, [v13, ConstInt(1)], v35),
+        ResOperation(rop.INT_AND, [v21, ConstInt(31)], tmp45),
+        ResOperation(rop.INT_RSHIFT, [v21, tmp45], v36),
+        ResOperation(rop.INT_AND, [v20, ConstInt(31)], tmp46),
+        ResOperation(rop.UINT_RSHIFT, [v4, tmp46], v37),
+        ResOperation(rop.UINT_GT, [v33, ConstInt(-11)], v38),
+        ResOperation(rop.INT_NEG, [v7], v39),
+        ResOperation(rop.INT_GT, [v24, v32], v40),
+        ResOperation(rop.FINISH, [v40, v36, v37, v31, v16, v34, v35, v23, v22, v29, v14, v39, v30, v38], None, descr=BasicFailDescr()),
+            ]
+    cpu = CPU(None, None)
+    cpu.setup_once()
+    looptoken = JitCellToken()
+    cpu.compile_loop(inputargs, operations, looptoken)
+    args = [-13 , 10 , 10 , 8 , -8 , -16 , -18 , 46 , -12 , 26]
+    cpu.execute_token(looptoken, *args)
+    assert cpu.get_latest_value_int(0) == 0
+    assert cpu.get_latest_value_int(1) == 0
+    assert cpu.get_latest_value_int(2) == 0
+    assert cpu.get_latest_value_int(3) == 0
+    assert cpu.get_latest_value_int(4) == 1
+    assert cpu.get_latest_value_int(5) == -7
+    assert cpu.get_latest_value_int(6) == 1
+    assert cpu.get_latest_value_int(7) == 0
+    assert cpu.get_latest_value_int(8) == -2
+    assert cpu.get_latest_value_int(9) == 18
+    assert cpu.get_latest_value_int(10) == 1
+    assert cpu.get_latest_value_int(11) == 18
+    assert cpu.get_latest_value_int(12) == -1
+    assert cpu.get_latest_value_int(13) == 0
+
+def test_bug_1():
+    v1 = BoxInt()
+    v2 = BoxInt()
+    v3 = BoxInt()
+    v4 = BoxInt()
+    v5 = BoxInt()
+    v6 = BoxInt()
+    v7 = BoxInt()
+    v8 = BoxInt()
+    v9 = BoxInt()
+    v10 = BoxInt()
+    v11 = BoxInt()
+    v12 = BoxInt()
+    v13 = BoxInt()
+    v14 = BoxInt()
+    v15 = BoxInt()
+    v16 = BoxInt()
+    v17 = BoxInt()
+    v18 = BoxInt()
+    v19 = BoxInt()
+    v20 = BoxInt()
+    v21 = BoxInt()
+    v22 = BoxInt()
+    v23 = BoxInt()
+    v24 = BoxInt()
+    v25 = BoxInt()
+    v26 = BoxInt()
+    v27 = BoxInt()
+    v28 = BoxInt()
+    v29 = BoxInt()
+    v30 = BoxInt()
+    v31 = BoxInt()
+    v32 = BoxInt()
+    v33 = BoxInt()
+    v34 = BoxInt()
+    v35 = BoxInt()
+    v36 = BoxInt()
+    v37 = BoxInt()
+    v38 = BoxInt()
+    v39 = BoxInt()
+    v40 = BoxInt()
+    tmp41 = BoxInt()
+    tmp42 = BoxInt()
+    tmp43 = BoxInt()
+    tmp44 = BoxInt()
+    tmp45 = BoxInt()
+    inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
+    operations = [
+        ResOperation(rop.UINT_LT, [v6, ConstInt(0)], v11),
+        ResOperation(rop.INT_AND, [v3, ConstInt(31)], tmp41),
+        ResOperation(rop.INT_RSHIFT, [v3, tmp41], v12),
+        ResOperation(rop.INT_NEG, [v2], v13),
+        ResOperation(rop.INT_ADD, [v11, v7], v14),
+        ResOperation(rop.INT_OR, [v3, v2], v15),
+        ResOperation(rop.INT_OR, [v12, v12], v16),
+        ResOperation(rop.INT_NE, [v2, v5], v17),
+        ResOperation(rop.INT_AND, [v5, ConstInt(31)], tmp42),
+        ResOperation(rop.UINT_RSHIFT, [v14, tmp42], v18),
+        ResOperation(rop.INT_AND, [v14, ConstInt(31)], tmp43),
+        ResOperation(rop.INT_LSHIFT, [ConstInt(7), tmp43], v19),
+        ResOperation(rop.INT_NEG, [v19], v20),
+        ResOperation(rop.INT_MOD, [v3, ConstInt(1)], v21),
+        ResOperation(rop.UINT_GE, [v15, v1], v22),
+        ResOperation(rop.INT_AND, [v16, ConstInt(31)], tmp44),
+        ResOperation(rop.INT_LSHIFT, [v8, tmp44], v23),
+        ResOperation(rop.INT_IS_TRUE, [v17], v24),
+        ResOperation(rop.INT_AND, [v5, ConstInt(31)], tmp45),
+        ResOperation(rop.INT_LSHIFT, [v14, tmp45], v25),
+        ResOperation(rop.INT_LSHIFT, [v5, ConstInt(17)], v26),
+        ResOperation(rop.INT_EQ, [v9, v15], v27),
+        ResOperation(rop.INT_GE, [ConstInt(0), v6], v28),
+        ResOperation(rop.INT_NEG, [v15], v29),
+        ResOperation(rop.INT_NEG, [v22], v30),
+        ResOperation(rop.INT_ADD, [v7, v16], v31),
+        ResOperation(rop.UINT_LT, [v19, v19], v32),
+        ResOperation(rop.INT_ADD, [v2, ConstInt(1)], v33),
+        ResOperation(rop.INT_NEG, [v5], v34),
+        ResOperation(rop.INT_ADD, [v17, v24], v35),
+        ResOperation(rop.UINT_LT, [ConstInt(2), v16], v36),
+        ResOperation(rop.INT_NEG, [v9], v37),
+        ResOperation(rop.INT_GT, [v4, v11], v38),
+        ResOperation(rop.INT_LT, [v27, v22], v39),
+        ResOperation(rop.INT_NEG, [v27], v40),
+        ResOperation(rop.FINISH, [v40, v10, v36, v26, v13, v30, v21, v33, v18, v25, v31, v32, v28, v29, v35, v38, v20, v39, v34, v23, v37], None, descr=BasicFailDescr()),
+            ]
+    cpu = CPU(None, None)
+    cpu.setup_once()
+    looptoken = JitCellToken()
+    cpu.compile_loop(inputargs, operations, looptoken)
+    args = [17 , -20 , -6 , 6 , 1 , 13 , 13 , 9 , 49 , 8]
+    cpu.execute_token(looptoken, *args)
+    assert cpu.get_latest_value_int(0) == 0
+    assert cpu.get_latest_value_int(1) == 8
+    assert cpu.get_latest_value_int(2) == 1
+    assert cpu.get_latest_value_int(3) == 131072
+    assert cpu.get_latest_value_int(4) == 20
+    assert cpu.get_latest_value_int(5) == -1
+    assert cpu.get_latest_value_int(6) == 0
+    assert cpu.get_latest_value_int(7) == -19
+    assert cpu.get_latest_value_int(8) == 6
+    assert cpu.get_latest_value_int(9) == 26
+    assert cpu.get_latest_value_int(10) == 12
+    assert cpu.get_latest_value_int(11) == 0
+    assert cpu.get_latest_value_int(12) == 0
+    assert cpu.get_latest_value_int(13) == 2
+    assert cpu.get_latest_value_int(14) == 2
+    assert cpu.get_latest_value_int(15) == 1
+    assert cpu.get_latest_value_int(16) == -57344
+    assert cpu.get_latest_value_int(17) == 1
+    assert cpu.get_latest_value_int(18) == -1
+    if WORD == 4:
+        assert cpu.get_latest_value_int(19) == -2147483648
+    elif WORD == 8:
+        assert cpu.get_latest_value_int(19) == 19327352832
+    assert cpu.get_latest_value_int(20) == -49
diff --git a/pypy/jit/backend/arm/test/test_regalloc_mov.py b/pypy/jit/backend/arm/test/test_regalloc_mov.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/test/test_regalloc_mov.py
@@ -0,0 +1,522 @@
+from pypy.rlib.objectmodel import instantiate
+from pypy.jit.backend.arm.assembler import AssemblerARM
+from pypy.jit.backend.arm.locations import imm, ConstFloatLoc,\
+                                        RegisterLocation, StackLocation, \
+                                        VFPRegisterLocation, get_fp_offset
+from pypy.jit.backend.arm.registers import lr, ip, fp, vfp_ip
+from pypy.jit.backend.arm.conditions import AL
+from pypy.jit.backend.arm.arch import WORD
+from pypy.jit.metainterp.history import FLOAT
+import py
+
+
+class MockInstr(object):
+    def __init__(self, name, *args, **kwargs):
+        self.name = name
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+
+    def __repr__(self):
+        return "%s %r %r" % (self.name, self.args, self.kwargs)
+
+    __str__ = __repr__
+
+    def __eq__(self, other):
+        return (self.__class__ == other.__class__
+                and self.name == other.name
+                and self.args == other.args
+                and self.kwargs == other.kwargs)
+mi = MockInstr
+
+
+# helper method for tests
+def r(i):
+    return RegisterLocation(i)
+
+
+def vfp(i):
+    return VFPRegisterLocation(i)
+
+
+def stack(i, **kwargs):
+    return StackLocation(i, get_fp_offset(i), **kwargs)
+
+
+def stack_float(i, **kwargs):
+    return StackLocation(i, get_fp_offset(i + 1), type=FLOAT)
+
+
+def imm_float(value):
+    addr = int(value)  # whatever
+    return ConstFloatLoc(addr)
+
+
+class MockBuilder(object):
+    def __init__(self):
+        self.instrs = []
+
+    def __getattr__(self, name):
+        i = MockInstr(name)
+        self.instrs.append(i)
+        return i
+
+
+class BaseMovTest(object):
+    def setup_method(self, method):
+        self.builder = MockBuilder()
+        self.asm = instantiate(AssemblerARM)
+        self.asm.mc = self.builder
+
+    def validate(self, expected):
+        result = self.builder.instrs
+        assert result == expected
+
+
+class TestRegallocMov(BaseMovTest):
+
+    def mov(self, a, b, expected=None):
+        self.asm.regalloc_mov(a, b)
+        self.validate(expected)
+
+    def test_mov_imm_to_reg(self):
+        val = imm(123)
+        reg = r(7)
+        expected = [mi('gen_load_int', 7, 123, cond=AL)]
+        self.mov(val, reg, expected)
+
+    def test_mov_large_imm_to_reg(self):
+        val = imm(65536)
+        reg = r(7)
+        expected = [mi('gen_load_int', 7, 65536, cond=AL)]
+        self.mov(val, reg, expected)
+
+    def test_mov_imm_to_stacklock(self):
+        val = imm(100)
+        s = stack(7)
+        expected = [
+                mi('PUSH', [lr.value], cond=AL),
+                mi('gen_load_int', lr.value, 100, cond=AL),
+                mi('STR_ri', lr.value, fp.value, imm=-s.value, cond=AL),
+                mi('POP', [lr.value], cond=AL)]
+        self.mov(val, s, expected)
+
+    def test_mov_big_imm_to_stacklock(self):
+        val = imm(65536)
+        s = stack(7)
+        expected = [
+                mi('PUSH', [lr.value], cond=AL),
+                mi('gen_load_int', lr.value, 65536, cond=AL),
+                mi('STR_ri', lr.value, fp.value, imm=-s.value, cond=AL),
+                mi('POP', [lr.value], cond=AL)]
+
+        self.mov(val, s, expected)
+
+    def test_mov_imm_to_big_stacklock(self):
+        val = imm(100)
+        s = stack(8191)
+        expected = [mi('PUSH', [lr.value], cond=AL),
+                    mi('gen_load_int', lr.value, 100, cond=AL),
+                    mi('PUSH', [ip.value], cond=AL),
+                    mi('gen_load_int', ip.value, -s.value, cond=AL),
+                    mi('STR_rr', lr.value, fp.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL),
+                    mi('POP', [lr.value], cond=AL)]
+        self.mov(val, s, expected)
+
+    def test_mov_big_imm_to_big_stacklock(self):
+        val = imm(65536)
+        s = stack(8191)
+        expected = [mi('PUSH', [lr.value], cond=AL),
+                    mi('gen_load_int', lr.value, 65536, cond=AL),
+                    mi('PUSH', [ip.value], cond=AL),
+                    mi('gen_load_int', ip.value, -s.value, cond=AL),
+                    mi('STR_rr', lr.value, fp.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL),
+                    mi('POP', [lr.value], cond=AL)]
+        self.mov(val, s, expected)
+
+    def test_mov_reg_to_reg(self):
+        r1 = r(1)
+        r9 = r(9)
+        expected = [mi('MOV_rr', r9.value, r1.value, cond=AL)]
+        self.mov(r1, r9, expected)
+
+    def test_mov_reg_to_stack(self):
+        s = stack(10)
+        r6 = r(6)
+        expected = [mi('STR_ri', r6.value, fp.value, imm=-s.value, cond=AL)]
+        self.mov(r6, s, expected)
+
+    def test_mov_reg_to_big_stackloc(self):
+        s = stack(8191)
+        r6 = r(6)
+        expected = [mi('PUSH', [ip.value], cond=AL),
+                    mi('gen_load_int', ip.value, -s.value, cond=AL),
+                    mi('STR_rr', r6.value, fp.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL)]
+        self.mov(r6, s, expected)
+
+    def test_mov_stack_to_reg(self):
+        s = stack(10)
+        r6 = r(6)
+        expected = [mi('LDR_ri', r6.value, fp.value, imm=-s.value, cond=AL)]
+        self.mov(s, r6, expected)
+
+    def test_mov_big_stackloc_to_reg(self):
+        s = stack(8191)
+        r6 = r(6)
+        expected = [
+                    mi('PUSH', [lr.value], cond=AL),
+                    mi('gen_load_int', lr.value, -s.value, cond=AL),
+                    mi('LDR_rr', r6.value, fp.value, lr.value, cond=AL),
+                    mi('POP', [lr.value], cond=AL)]
+        self.mov(s, r6, expected)
+
+    def test_mov_float_imm_to_vfp_reg(self):
+        f = imm_float(3.5)
+        reg = vfp(5)
+        expected = [
+                    mi('PUSH', [ip.value], cond=AL),
+                    mi('gen_load_int', ip.value, f.value, cond=AL),
+                    mi('VLDR', 5, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL)]
+        self.mov(f, reg, expected)
+
+    def test_mov_vfp_reg_to_vfp_reg(self):
+        reg1 = vfp(5)
+        reg2 = vfp(14)
+        expected = [mi('VMOV_cc', reg2.value, reg1.value, cond=AL)]
+        self.mov(reg1, reg2, expected)
+
+    def test_mov_vfp_reg_to_stack(self):
+        reg = vfp(7)
+        s = stack_float(3)
+        expected = [mi('PUSH', [ip.value], cond=AL),
+                    mi('SUB_ri', ip.value, fp.value, s.value, cond=AL),
+                    mi('VSTR', reg.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL)]
+        self.mov(reg, s, expected)
+
+    def test_mov_vfp_reg_to_large_stackloc(self):
+        reg = vfp(7)
+        s = stack_float(800)
+        expected = [mi('PUSH', [ip.value], cond=AL),
+                    mi('gen_load_int', ip.value, s.value, cond=AL),
+                    mi('SUB_rr', ip.value, fp.value, ip.value, cond=AL),
+                    mi('VSTR', reg.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL)]
+        self.mov(reg, s, expected)
+
+    def test_mov_stack_to_vfp_reg(self):
+        reg = vfp(7)
+        s = stack_float(3)
+        expected = [mi('PUSH', [ip.value], cond=AL),
+                    mi('SUB_ri', ip.value, fp.value, s.value, cond=AL),
+                    mi('VLDR', reg.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL)]
+        self.mov(s, reg, expected)
+
+    def test_mov_big_stackloc_to_vfp_reg(self):
+        reg = vfp(7)
+        s = stack_float(800)
+        expected = [mi('PUSH', [ip.value], cond=AL),
+                    mi('gen_load_int', ip.value, s.value, cond=AL),
+                    mi('SUB_rr', ip.value, fp.value, ip.value, cond=AL),
+                    mi('VSTR', reg.value, ip.value, cond=AL),
+                    mi('POP', [ip.value], cond=AL)]
+        self.mov(reg, s, expected)
+
+    def test_unsopported_cases(self):
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm(1), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm(1), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm(1), vfp(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm(1), stack_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm_float(1), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm_float(1), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm_float(1), r(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(imm_float(1), stack(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(r(1), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(r(1), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(r(1), stack_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(r(1), vfp(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack(1), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack(1), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack(1), stack(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack(1), stack_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack(1), vfp(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack(1), lr)')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack_float(1), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack_float(1), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack_float(1), r(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack_float(1), stack(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(stack_float(1), stack_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(vfp(1), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(vfp(1), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(vfp(1), r(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.regalloc_mov(vfp(1), stack(2))')
+
+
+class TestMovFromVFPLoc(BaseMovTest):
+    def mov(self, a, b, c, expected=None):
+        self.asm.mov_from_vfp_loc(a, b, c)
+        self.validate(expected)
+
+    def test_from_vfp(self):
+        vr = vfp(10)
+        r1 = r(1)
+        r2 = r(2)
+        e = [mi('VMOV_rc', r1.value, r2.value, vr.value, cond=AL)]
+        self.mov(vr, r1, r2, e)
+
+    def test_from_vfp_stack(self):
+        s = stack_float(4)
+        r1 = r(1)
+        r2 = r(2)
+        e = [
+            mi('LDR_ri', r1.value, fp.value, imm=-s.value, cond=AL),
+            mi('LDR_ri', r2.value, fp.value, imm=-s.value + WORD, cond=AL)]
+        self.mov(s, r1, r2, e)
+
+    def test_from_big_vfp_stack(self):
+        s = stack_float(2049)
+        r1 = r(1)
+        r2 = r(2)
+        e = [
+            mi('PUSH', [ip.value], cond=AL),
+            mi('gen_load_int', ip.value, -s.value, cond=AL),
+            mi('LDR_rr', r1.value, fp.value, ip.value, cond=AL),
+            mi('ADD_ri', ip.value, ip.value, imm=WORD, cond=AL),
+            mi('LDR_rr', r2.value, fp.value, ip.value, cond=AL),
+            mi('POP', [ip.value], cond=AL)]
+        self.mov(s, r1, r2, e)
+
+    def test_from_imm_float(self):
+        i = imm_float(4)
+        r1 = r(1)
+        r2 = r(2)
+        e = [
+            mi('PUSH', [ip.value], cond=AL),
+            mi('gen_load_int', ip.value, i.value, cond=AL),
+            mi('LDR_ri', r1.value, ip.value, cond=AL),
+            mi('LDR_ri', r2.value, ip.value, imm=4, cond=AL),
+            mi('POP', [ip.value], cond=AL)]
+        self.mov(i, r1, r2, e)
+
+    def test_unsupported(self):
+        py.test.raises(AssertionError,
+                        'self.asm.mov_from_vfp_loc(vfp(1), r(5), r(2))')
+        py.test.raises(AssertionError,
+                        'self.asm.mov_from_vfp_loc(stack(1), r(1), r(2))')
+        py.test.raises(AssertionError,
+                        'self.asm.mov_from_vfp_loc(imm(1), r(1), r(2))')
+        py.test.raises(AssertionError,
+                        'self.asm.mov_from_vfp_loc(r(1), r(1), r(2))')
+
+
+class TestMoveToVFPLoc(BaseMovTest):
+    def mov(self, r1, r2, vfp, expected):
+        self.asm.mov_to_vfp_loc(r1, r2, vfp)
+        self.validate(expected)
+
+    def mov_to_vfp_reg(self):
+        vr = vfp(10)
+        r1 = r(1)
+        r2 = r(2)
+        e = [mi('VMOV_cr', vr.value, r1.value, r2.value, cond=AL)]
+        self.mov(vr, r1, r2, e)
+
+    def test_to_vfp_stack(self):
+        s = stack_float(4)
+        r1 = r(1)
+        r2 = r(2)
+        e = [
+            mi('STR_ri', r1.value, fp.value, imm=-s.value, cond=AL),
+            mi('STR_ri', r2.value, fp.value, imm=-s.value + WORD, cond=AL)]
+        self.mov(r1, r2, s, e)
+
+    def test_from_big_vfp_stack(self):
+        s = stack_float(2049)
+        r1 = r(1)
+        r2 = r(2)
+        e = [
+            mi('PUSH', [ip.value], cond=AL),
+            mi('gen_load_int', ip.value, -s.value, cond=AL),
+            mi('STR_rr', r1.value, fp.value, ip.value, cond=AL),
+            mi('ADD_ri', ip.value, ip.value, imm=4, cond=AL),
+            mi('STR_rr', r2.value, fp.value, ip.value, cond=AL),
+            mi('POP', [ip.value], cond=AL)]
+        self.mov(r1, r2, s, e)
+
+    def unsupported(self):
+        py.test.raises(AssertionError,
+                    'self.asm.mov_from_vfp_loc(r(5), r(2), vfp(4))')
+        py.test.raises(AssertionError,
+                    'self.asm.mov_from_vfp_loc(r(1), r(2), stack(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.mov_from_vfp_loc(r(1), r(2), imm(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.mov_from_vfp_loc(r(1), r(2), imm_float(2))')
+        py.test.raises(AssertionError,
+                    'self.asm.mov_from_vfp_loc(r(1), r(1), r(2))')
+
+
+class TestRegallocPush(BaseMovTest):
+    def push(self, v, e):
+        self.asm.regalloc_push(v)
+        self.validate(e)
+
+    def test_push_imm(self):
+        i = imm(12)
+        e = [mi('gen_load_int', ip.value, 12, cond=AL),
+             mi('PUSH', [ip.value], cond=AL)]
+        self.push(i, e)
+
+    def test_push_reg(self):
+        r7 = r(7)
+        e = [mi('PUSH', [r7.value], cond=AL)]
+        self.push(r7, e)
+
+    def test_push_imm_float(self):
+        f = imm_float(7)
+        e = [mi('PUSH', [ip.value], cond=AL),
+            mi('gen_load_int', ip.value, 7, cond=AL),
+            mi('VLDR', vfp_ip.value, ip.value, cond=AL),
+            mi('POP', [ip.value], cond=AL),
+            mi('VPUSH', [vfp_ip.value], cond=AL)
+            ]
+        self.push(f, e)
+
+    def test_push_stack(self):
+        s = stack(7)
+        e = [mi('LDR_ri', ip.value, fp.value, imm=-s.value, cond=AL),
+            mi('PUSH', [ip.value], cond=AL)
+            ]
+        self.push(s, e)
+
+    def test_push_big_stack(self):
+        s = stack(1025)
+        e = [mi('PUSH', [lr.value], cond=AL),
+            mi('gen_load_int', lr.value, -s.value, cond=AL),
+            mi('LDR_rr', ip.value, fp.value, lr.value, cond=AL),
+            mi('POP', [lr.value], cond=AL),
+            mi('PUSH', [ip.value], cond=AL)
+            ]
+        self.push(s, e)
+
+    def test_push_vfp_reg(self):
+        v1 = vfp(1)
+        e = [mi('VPUSH', [v1.value], cond=AL)]
+        self.push(v1, e)
+
+    def test_push_stack_float(self):
+        sf = stack_float(4)
+        e = [
+            mi('PUSH', [ip.value], cond=AL),
+            mi('SUB_ri', ip.value, fp.value, sf.value, cond=AL),
+            mi('VLDR', vfp_ip.value, ip.value, cond=AL),
+            mi('POP', [ip.value], cond=AL),
+            mi('VPUSH', [vfp_ip.value], cond=AL),
+        ]
+        self.push(sf, e)
+
+    def test_push_large_stackfloat(self):
+        sf = stack_float(100)
+        e = [
+            mi('PUSH', [ip.value], cond=AL),
+            mi('gen_load_int', ip.value, sf.value, cond=AL),
+            mi('SUB_rr', ip.value, fp.value, ip.value, cond=AL),
+            mi('VLDR', vfp_ip.value, ip.value, cond=AL),
+            mi('POP', [ip.value], cond=AL),
+            mi('VPUSH', [vfp_ip.value], cond=AL),
+        ]
+        self.push(sf, e)
+
+
+class TestRegallocPop(BaseMovTest):
+    def pop(self, loc, e):
+        self.asm.regalloc_pop(loc)
+        self.validate(e)
+
+    def test_pop_reg(self):
+        r1 = r(1)
+        e = [mi('POP', [r1.value], cond=AL)]
+        self.pop(r1, e)
+
+    def test_pop_vfp_reg(self):
+        vr1 = vfp(1)
+        e = [mi('VPOP', [vr1.value], cond=AL)]
+        self.pop(vr1, e)
+
+    def test_pop_stackloc(self):
+        s = stack(12)
+        e = [
+            mi('POP', [ip.value], cond=AL),
+            mi('STR_ri', ip.value, fp.value, imm=-s.value, cond=AL)]
+        self.pop(s, e)