[pypy-commit] pypy jitframe-on-heap: merge

fijal noreply at buildbot.pypy.org
Mon Jan 28 17:17:00 CET 2013


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: jitframe-on-heap
Changeset: r60619:aef40e6e1dc8
Date: 2013-01-28 18:16 +0200
http://bitbucket.org/pypy/pypy/changeset/aef40e6e1dc8/

Log:	merge

diff --git a/rpython/jit/backend/arm/arch.py b/rpython/jit/backend/arm/arch.py
--- a/rpython/jit/backend/arm/arch.py
+++ b/rpython/jit/backend/arm/arch.py
@@ -1,7 +1,3 @@
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.rarithmetic import r_uint
-
-
 FUNC_ALIGN = 8
 WORD = 4
 DOUBLE_WORD = 8
@@ -14,54 +10,13 @@
 PC_OFFSET = 8
 FORCE_INDEX_OFS = 0
 
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
-eci = ExternalCompilationInfo(post_include_bits=["""
-static int pypy__arm_int_div(int a, int b) {
-    return a/b;
-}
-static unsigned int pypy__arm_uint_div(unsigned int a, unsigned int b) {
-    return a/b;
-}
-static int pypy__arm_int_mod(int a, int b) {
-    return a % b;
-}
-"""])
+# The stack contains the force_index and the, callee saved registers and
+# ABI required information
+# All the rest of the data is in a GC-managed variable-size "frame".
+# This jitframe object's address is always stored in the register FP
+# A jitframe is a jit.backend.llsupport.llmodel.JITFRAME = GcArray(Signed).
+# Stack frame fixed area
+# Currently only the force_index
+FRAME_FIXED_SIZE = 1
+JITFRAME_FIXED_SIZE = 16 + 16 * 2 # 16 GPR + 16 VFP Regs (64bit)
 
-
-def arm_int_div_emulator(a, b):
-    return int(a / float(b))
-arm_int_div_sign = lltype.Ptr(
-        lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
-arm_int_div = rffi.llexternal(
-    "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed,
-                        _callable=arm_int_div_emulator,
-                        compilation_info=eci,
-                        _nowrapper=True, elidable_function=True)
-
-
-def arm_uint_div_emulator(a, b):
-    return r_uint(a) / r_uint(b)
-arm_uint_div_sign = lltype.Ptr(
-        lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned))
-arm_uint_div = rffi.llexternal(
-    "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned,
-                        _callable=arm_uint_div_emulator,
-                        compilation_info=eci,
-                        _nowrapper=True, elidable_function=True)
-
-
-def arm_int_mod_emulator(a, b):
-    sign = 1
-    if a < 0:
-        a = -1 * a
-        sign = -1
-    if b < 0:
-        b = -1 * b
-    res = a % b
-    return sign * res
-arm_int_mod_sign = arm_int_div_sign
-arm_int_mod = rffi.llexternal(
-    "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed,
-                        _callable=arm_int_mod_emulator,
-                        compilation_info=eci,
-                        _nowrapper=True, elidable_function=True)
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -5,7 +5,8 @@
 from rpython.jit.backend.arm import conditions as c
 from rpython.jit.backend.arm import registers as r
 from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, FUNC_ALIGN, \
-                                    N_REGISTERS_SAVED_BY_MALLOC
+                                    N_REGISTERS_SAVED_BY_MALLOC, \
+                                    JITFRAME_FIXED_SIZE, FRAME_FIXED_SIZE
 from rpython.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
 from rpython.jit.backend.arm.locations import get_fp_offset
 from rpython.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
@@ -21,7 +22,7 @@
 from rpython.jit.metainterp.resoperation import rop, ResOperation
 from rpython.rlib import rgc
 from rpython.rlib.objectmodel import we_are_translated, specialize
-from rpython.rtyper.annlowlevel import llhelper
+from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
 from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.jit.backend.arm.opassembler import ResOpAssembler
@@ -29,9 +30,9 @@
                              have_debug_prints, fatalerror)
 from rpython.rlib.jit import AsmInfo
 from rpython.rlib.objectmodel import compute_unique_id
+from rpython.rlib.rarithmetic import intmask, r_uint
 
-# XXX Move to llsupport
-from rpython.jit.backend.x86.support import memcpy_fn
+from rpython.jit.backend.arm.support import memcpy_fn
 
 DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
                               ('type', lltype.Char),  # 'b'ridge, 'l'abel or
@@ -41,8 +42,6 @@
 
 class AssemblerARM(ResOpAssembler):
 
-    STACK_FIXED_AREA = -1
-
     debug = True
 
     def __init__(self, cpu, translate_support_code=False):
@@ -59,33 +58,19 @@
         self.datablockwrapper = None
         self.propagate_exception_path = 0
         self.stack_check_slowpath = 0
-        self._compute_stack_size()
         self._debug = False
         self.loop_run_counters = []
         self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
-        self.force_token_to_dead_frame = {}    # XXX temporary hack
+        self.gcrootmap_retaddr_forced = 0
 
     def set_debug(self, v):
         r = self._debug
         self._debug = v
         return r
 
-    def _compute_stack_size(self):
-        self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD
-        self.STACK_FIXED_AREA += WORD  # FORCE_TOKEN
-        self.STACK_FIXED_AREA += N_REGISTERS_SAVED_BY_MALLOC * WORD
-        if self.cpu.supports_floats:
-            self.STACK_FIXED_AREA += (len(r.callee_saved_vfp_registers)
-                                        * DOUBLE_WORD)
-        if self.STACK_FIXED_AREA % 8 != 0:
-            self.STACK_FIXED_AREA += WORD  # Stack alignment
-        assert self.STACK_FIXED_AREA % 8 == 0
-
-    def setup(self, looptoken, operations):
+    def setup(self, looptoken):
+        assert self.memcpy_addr != 0, 'setup_once() not called?'
         self.current_clt = looptoken.compiled_loop_token
-        operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu,
-                        operations, self.current_clt.allgcrefs)
-        assert self.memcpy_addr != 0, 'setup_once() not called?'
         self.mc = ARMv7Builder()
         self.pending_guards = []
         assert self.datablockwrapper is None
@@ -93,7 +78,6 @@
         self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
                                                         allblocks)
         self.target_tokens_currently_compiling = {}
-        return operations
 
     def teardown(self):
         self.current_clt = None
@@ -106,10 +90,11 @@
         # Addresses of functions called by new_xxx operations
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
+        self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+        self._build_failure_recovery(exc=True, withfloats=False)
+        self._build_failure_recovery(exc=False, withfloats=False)
         self._build_wb_slowpath(False)
         self._build_wb_slowpath(True)
-        self._build_failure_recovery(exc=True, withfloats=False)
-        self._build_failure_recovery(exc=False, withfloats=False)
         if self.cpu.supports_floats:
             self._build_wb_slowpath(False, withfloats=True)
             self._build_wb_slowpath(True, withfloats=True)
@@ -121,7 +106,6 @@
         self._build_stack_check_slowpath()
         if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
             self._build_release_gil(gc_ll_descr.gcrootmap)
-        self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
 
         if not self._debug:
             # if self._debug is already set it means that someone called
@@ -130,6 +114,9 @@
             debug_start('jit-backend-counts')
             self.set_debug(have_debug_prints())
             debug_stop('jit-backend-counts')
+        # when finishing, we only have one value at [0], the rest dies
+        self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1, zero=True)
+        self.gcmap_for_finish[0] = r_uint(1)
 
     def finish_once(self):
         if self._debug:
@@ -218,18 +205,50 @@
         self.reacqgil_addr = rffi.cast(lltype.Signed, reacqgil_func)
 
     def _build_propagate_exception_path(self):
-        if self.cpu.propagate_exception_v < 0:
+        if not self.cpu.propagate_exception_descr:
             return      # not supported (for tests, or non-translated)
         #
         mc = ARMv7Builder()
         #
-        # Call the helper, which will return a dead frame object with
-        # the correct exception set, or MemoryError by default
-        # XXX make sure we return the correct value here
+        # read and reset the current exception
         addr = rffi.cast(lltype.Signed, self.cpu.get_propagate_exception())
         mc.BL(addr)
         self.gen_func_epilog(mc=mc)
         self.propagate_exception_path = mc.materialize(self.cpu.asmmemmgr, [])
+        #
+        self._store_and_reset_exception(r.r0)
+        ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+        # make sure ofs fits into a register
+        assert check_imm_arg(ofs)
+        self.mc.STR_ri(r.r0.value, r.fp.value, imm=ofs)
+        propagate_exception_descr = rffi.cast(lltype.Signed,
+                  cast_instance_to_gcref(self.cpu.propagate_exception_descr))
+        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+        # make sure ofs fits into a register
+        assert check_imm_arg(ofs)
+        self.mc.BKPT()
+        #base_ofs = self.cpu.get_baseofs_of_frame_field()
+        #self.mc.MOV_bi(ofs, propagate_exception_descr)
+        #self.mc.LEA_rb(eax.value, -base_ofs)
+        #
+        self._call_footer()
+        rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
+        self.propagate_exception_path = rawstart
+        self.mc = None
+
+    def _store_and_reset_exception(self, resloc=None):
+        assert resloc is not r.ip
+        if resloc is not None:
+            self.mc.gen_load_int(resloc.value, self.cpu.pos_exc_value())
+            self.mc.LDR_ri(resloc.value, resloc.value)
+            self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
+
+        with saved_registers(self.mc, [r.r0]):
+            self.mc.gen_load_int(r.r0.value, self.cpu.pos_exc_value())
+            self.mc.gen_load_int(r.ip.value, 0)
+            self.mc.STR_ri(r.ip.value, r.r0.value)
+            self.mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
+            self.mc.STR_ri(r.ip.value, r.r0.value)
 
     def _build_stack_check_slowpath(self):
         _, _, slowpathaddr = self.cpu.insert_stack_check()
@@ -558,7 +577,7 @@
                 # We might have an exception pending.  Load it into r4
                 # (this is a register saved across calls)
                 mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
-                mc.LDR_ri(r.r4.value, self.cpu.pos_exc_value())
+                mc.LDR_ri(r.r4.value, r.r5.value)
                 # clear the exc flags
                 mc.gen_load_int(r.r6.value, 0)
                 mc.STR_ri(r.r6.value, r.r5.value)
@@ -661,37 +680,35 @@
             self.mc.writechar(chr(0))
 
     def gen_func_epilog(self, mc=None, cond=c.AL):
-        stack_size = self.STACK_FIXED_AREA
-        stack_size -= len(r.callee_saved_registers) * WORD
-        if self.cpu.supports_floats:
-            stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
-
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if mc is None:
             mc = self.mc
         if gcrootmap and gcrootmap.is_shadow_stack:
             self.gen_footer_shadowstack(gcrootmap, mc)
-        mc.MOV_rr(r.sp.value, r.fp.value, cond=cond)
-        mc.ADD_ri(r.sp.value, r.sp.value, stack_size, cond=cond)
+        mc.ADD_ri(r.sp.value, r.sp.value, WORD, cond=cond) # for the force index
         if self.cpu.supports_floats:
             mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
                                                                     cond=cond)
         mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond)
+        mc.BKPT()
 
     def gen_func_prolog(self):
-        stack_size = self.STACK_FIXED_AREA
-        stack_size -= len(r.callee_saved_registers) * WORD
+        stack_size = FRAME_FIXED_SIZE * WORD
+        stack_size += len(r.callee_saved_registers) * WORD
         if self.cpu.supports_floats:
-            stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+            stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD
 
         self.mc.PUSH([reg.value for reg in r.callee_saved_registers])
         if self.cpu.supports_floats:
             self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
-        # here we modify the stack pointer to leave room for the 9 registers
-        # that are going to be saved here around malloc calls and one word to
-        # store the force index
-        self.mc.SUB_ri(r.sp.value, r.sp.value, stack_size)
-        self.mc.MOV_rr(r.fp.value, r.sp.value)
+        self.mc.SUB_ri(r.sp.value, r.sp.value, WORD) # for the force index
+        assert stack_size % 8 == 0 # ensure we keep alignment
+
+        # set fp to point to the JITFRAME + ofs
+        ofs = self.cpu.get_baseofs_of_frame_field()
+        assert check_imm_arg(ofs)
+        self.mc.ADD_ri(r.fp.value, r.r0.value, imm=ofs)
+        #
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap and gcrootmap.is_shadow_stack:
             self.gen_shadowstack_header(gcrootmap)
@@ -754,7 +771,9 @@
     # cpu interface
     def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.frame_info = lltype.malloc(jitframe.JITFRAMEINFO)
         clt.allgcrefs = []
+        clt.frame_info.jfi_frame_depth = 0 # for now
         looptoken.compiled_loop_token = clt
         clt._debug_nbargs = len(inputargs)
 
@@ -762,38 +781,40 @@
             # Arguments should be unique
             assert len(set(inputargs)) == len(inputargs)
 
-        operations = self.setup(looptoken, operations)
-        if log:
+        self.setup(looptoken)
+        if False and log:
             operations = self._inject_debugging_code(looptoken, operations,
                                                      'e', looptoken.number)
 
         self._call_header_with_stack_check()
-        sp_patch_location = self._prepare_sp_patch_position()
+        #sp_patch_location = self._prepare_sp_patch_position()
 
-        regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
-        regalloc.prepare_loop(inputargs, operations)
+        regalloc = Regalloc(assembler=self)
+        operations = regalloc.prepare_loop(inputargs, operations, looptoken,
+                                           clt.allgcrefs)
+        rgc._make_sure_does_not_move(lltype.cast_opaque_ptr(llmemory.GCREF,
+                                                            clt.frame_info))
 
         loop_head = self.mc.get_relative_pos()
         looptoken._arm_loop_code = loop_head
         #
-        clt.frame_depth = -1
-        frame_depth = self._assemble(operations, regalloc)
-        clt.frame_depth = frame_depth
+        frame_depth = self._assemble(regalloc, inputargs, operations)
+        self.update_frame_depth(frame_depth + JITFRAME_FIXED_SIZE)
         #
         size_excluding_failure_stuff = self.mc.get_relative_pos()
 
-        self._patch_sp_offset(sp_patch_location, frame_depth)
+        #self._patch_sp_offset(sp_patch_location, frame_depth)
         self.write_pending_failure_recoveries()
 
         rawstart = self.materialize_loop(looptoken)
-        looptoken._arm_func_addr = rawstart
+        looptoken._function_addr = looptoken._arm_func_addr = rawstart
 
         self.process_pending_guards(rawstart)
         self.fixup_target_tokens(rawstart)
 
         if log and not we_are_translated():
             self.mc._dump_trace(rawstart,
-                    'loop_%s.asm' % self.cpu.total_compiled_loops)
+                    'loop.asm')
 
         ops_offset = self.mc.ops_offset
         self.teardown()
@@ -809,18 +830,20 @@
         return AsmInfo(ops_offset, rawstart + loop_head,
                        size_excluding_failure_stuff - loop_head)
 
-    def _assemble(self, operations, regalloc):
+    def _assemble(self, regalloc, inputargs, operations):
         regalloc.compute_hint_frame_locations(operations)
-        self._walk_operations(operations, regalloc)
-        frame_depth = regalloc.frame_manager.get_frame_depth()
+        self._walk_operations(inputargs, operations, regalloc)
+        frame_depth = regalloc.get_final_frame_depth()
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
-            frame_depth = max(frame_depth,
-                                jump_target_descr._arm_clt.frame_depth)
+            tgt_depth = jump_target_descr._arm_clt.frame_info.jfi_frame_depth
+            target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
+            frame_depth = max(frame_depth, target_frame_depth)
         return frame_depth
 
     def assemble_bridge(self, faildescr, inputargs, operations,
                                                     original_loop_token, log):
+        assert 0
         operations = self.setup(original_loop_token, operations)
         descr_number = self.cpu.get_fail_descr_number(faildescr)
         if log:
@@ -899,6 +922,17 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
+    def update_frame_depth(self, frame_depth):
+        self.current_clt.frame_info.jfi_frame_depth = frame_depth
+        new_jumping_to = []
+        for wref in self.current_clt.jumping_to:
+            clt = wref()
+            if clt is not None:
+                clt.frame_info.jfi_frame_depth = max(frame_depth,
+                    clt.frame_info.jfi_frame_depth)
+                new_jumping_to.append(weakref.ref(clt))
+        self.current_clt.jumping_to = new_jumping_to
+
     def write_pending_failure_recoveries(self):
         for tok in self.pending_guards:
             #generate the exit stub and the encoded representation
@@ -972,7 +1006,7 @@
             else:
                 cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
 
-    def _walk_operations(self, operations, regalloc):
+    def _walk_operations(self, inputargs, operations, regalloc):
         fcond = c.AL
         self._regalloc = regalloc
         while regalloc.position() < len(operations) - 1:
@@ -1141,10 +1175,10 @@
             if not check_imm_arg(offset, size=0xFFF):
                 self.mc.PUSH([r.lr.value], cond=cond)
                 pushed = True
-                self.mc.gen_load_int(r.lr.value, -offset, cond=cond)
+                self.mc.gen_load_int(r.lr.value, offset, cond=cond)
                 self.mc.LDR_rr(loc.value, r.fp.value, r.lr.value, cond=cond)
             else:
-                self.mc.LDR_ri(loc.value, r.fp.value, imm=-offset, cond=cond)
+                self.mc.LDR_ri(loc.value, r.fp.value, imm=offset, cond=cond)
             if pushed:
                 self.mc.POP([r.lr.value], cond=cond)
         elif loc.is_vfp_reg():
@@ -1364,6 +1398,26 @@
         else:
             return 0
 
+    def push_gcmap(self, mc, gcmap, push=False, mov=False, store=False):
+        gcmapref = lltype.cast_opaque_ptr(llmemory.GCREF, gcmap)
+        # keep the ref alive
+        self.current_clt.allgcrefs.append(gcmapref)
+        rgc._make_sure_does_not_move(gcmapref)
+        pass
+        #if push:
+        #    mc.PUSH(imm(rffi.cast(lltype.Signed, gcmapref)))
+        #elif mov:
+        #    mc.MOV(RawEspLoc(0, REF),
+        #           imm(rffi.cast(lltype.Signed, gcmapref)))
+        #else:
+        #    assert store
+        #    ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+        #    mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed, gcmapref)))
+
+    def pop_gcmap(self, mc):
+        ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+        mc.MOV_bi(ofs, 0)
+
 
 def not_implemented(msg):
     os.write(2, '[ARM/asm] %s\n' % msg)
diff --git a/rpython/jit/backend/arm/codebuilder.py b/rpython/jit/backend/arm/codebuilder.py
--- a/rpython/jit/backend/arm/codebuilder.py
+++ b/rpython/jit/backend/arm/codebuilder.py
@@ -1,6 +1,6 @@
-from rpython.jit.backend.arm import arch
 from rpython.jit.backend.arm import conditions as cond
 from rpython.jit.backend.arm import registers as reg
+from rpython.jit.backend.arm import support
 from rpython.jit.backend.arm.arch import (WORD, FUNC_ALIGN)
 from rpython.jit.backend.arm.instruction_builder import define_instructions
 from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
@@ -17,7 +17,7 @@
 
 
 def binary_helper_call(name):
-    function = getattr(arch, 'arm_%s' % name)
+    function = getattr(support, 'arm_%s' % name)
 
     def f(self, c=cond.AL):
         """Generates a call to a helper function, takes its
diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -1,5 +1,5 @@
 from rpython.jit.metainterp.history import INT, FLOAT
-from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD
+from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
 
 
 class AssemblerLocation(object):
@@ -136,9 +136,5 @@
     return ImmLocation(i)
 
 
-def get_fp_offset(i):
-    if i >= 0:
-        # Take the FORCE_TOKEN into account
-        return (1 + i) * WORD
-    else:
-        return i * WORD
+def get_fp_offset(position):
+    return WORD * (position + JITFRAME_FIXED_SIZE)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -317,9 +317,27 @@
         return fcond
 
     def emit_op_finish(self, op, arglocs, regalloc, fcond):
-        [argloc] = arglocs
-        if argloc is not r.r0: #XXX verify this
-            self.mov_loc_loc(argloc, r.r0, fcond)
+        base_ofs = self.cpu.get_baseofs_of_frame_field() - WORD
+        if len(arglocs) == 2:
+            [return_val, fail_descr_loc] = arglocs
+            if op.getarg(0).type == FLOAT and not IS_X86_64:
+                XXX
+                size = WORD * 2
+            else:
+                size = WORD
+            self.mc.STR_ri(return_val.value, r.fp.value)#, imm=-base_ofs)
+            #self.save_into_mem(raw_stack(0), return_val, imm(size))
+        else:
+            [fail_descr_loc] = arglocs
+        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+        base_ofs = self.cpu.get_baseofs_of_frame_field()
+
+        self.mc.gen_load_int(r.ip.value, fail_descr_loc.value)
+        # XXX self.mov(fail_descr_loc, RawStackLoc(ofs))
+        self.mc.STR_ri(r.ip.value, r.fp.value, imm=ofs)
+        gcmap = self.gcmap_for_finish
+        self.push_gcmap(self.mc, gcmap, store=True)
+        self.mc.SUB_ri(r.r0.value, r.fp.value, base_ofs)
         # exit function
         self.gen_func_epilog()
         return fcond
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -1,3 +1,5 @@
+from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
+from rpython.rlib import rgc
 from rpython.jit.backend.llsupport.regalloc import FrameManager, \
         RegisterManager, TempBox, compute_vars_longevity
 from rpython.jit.backend.arm import registers as r
@@ -180,10 +182,10 @@
 
 class Regalloc(object):
 
-    def __init__(self, frame_manager=None, assembler=None):
+    def __init__(self, assembler=None):
         self.cpu = assembler.cpu
         self.assembler = assembler
-        self.frame_manager = frame_manager
+        self.frame_manager = None
         self.jump_target_descr = None
         self.final_jump_op = None
 
@@ -282,7 +284,12 @@
             assert isinstance(value, ConstFloat)
             return self.vfprm.convert_to_imm(value)
 
-    def _prepare(self,  inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
+        self.frame_manager = self.fm = ARMFrameManager()
+        cpu = self.assembler.cpu
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
+        # compute longevity of variables
         longevity, last_real_usage = compute_vars_longevity(
                                                     inputargs, operations)
         self.longevity = longevity
@@ -291,92 +298,27 @@
         asm = self.assembler
         self.vfprm = VFPRegisterManager(longevity, fm, asm)
         self.rm = CoreRegisterManager(longevity, fm, asm)
+        return operations
 
-    def prepare_loop(self, inputargs, operations):
-        self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self._set_initial_bindings(inputargs)
-        self.possibly_free_vars(inputargs)
+        self.possibly_free_vars(list(inputargs))
+        return operations
 
     def prepare_bridge(self, inputargs, arglocs, ops):
         self._prepare(inputargs, ops)
         self._update_bindings(arglocs, inputargs)
 
+    def get_final_frame_depth(self):
+        return self.frame_manager.get_frame_depth()
+
     def _set_initial_bindings(self, inputargs):
-        # The first inputargs are passed in registers r0-r3
-        # we relly on the soft-float calling convention so we need to move
-        # float params to the coprocessor.
-        if self.cpu.use_hf_abi:
-            self._set_initial_bindings_hf(inputargs)
-        else:
-            self._set_initial_bindings_sf(inputargs)
-
-    def _set_initial_bindings_sf(self, inputargs):
-
-        arg_index = 0
-        count = 0
-        n_register_args = len(r.argument_regs)
-        cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD)
+        # the input args are passed in the jitframe
         for box in inputargs:
             assert isinstance(box, Box)
-            # handle inputargs in argument registers
-            if box.type == FLOAT and arg_index % 2 != 0:
-                arg_index += 1  # align argument index for float passed
-                                # in register
-            if arg_index < n_register_args:
-                if box.type == FLOAT:
-                    loc = r.argument_regs[arg_index]
-                    loc2 = r.argument_regs[arg_index + 1]
-                    vfpreg = self.try_allocate_reg(box)
-                    # move soft-float argument to vfp
-                    self.assembler.mov_to_vfp_loc(loc, loc2, vfpreg)
-                    arg_index += 2  # this argument used two argument registers
-                else:
-                    loc = r.argument_regs[arg_index]
-                    self.try_allocate_reg(box, selected_reg=loc)
-                    arg_index += 1
-            else:
-                # treat stack args as stack locations with a negative offset
-                if box.type == FLOAT:
-                    cur_frame_pos -= 2
-                    if count % 2 != 0: # Stack argument alignment
-                        cur_frame_pos -= 1
-                        count = 0
-                else:
-                    cur_frame_pos -= 1
-                    count += 1
-                loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
-                self.frame_manager.set_binding(box, loc)
-
-    def _set_initial_bindings_hf(self, inputargs):
-
-        arg_index = vfp_arg_index = 0
-        count = 0
-        n_reg_args = len(r.argument_regs)
-        n_vfp_reg_args = len(r.vfp_argument_regs)
-        cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD)
-        for box in inputargs:
-            assert isinstance(box, Box)
-            # handle inputargs in argument registers
-            if box.type != FLOAT and arg_index < n_reg_args:
-                reg = r.argument_regs[arg_index]
-                self.try_allocate_reg(box, selected_reg=reg)
-                arg_index += 1
-            elif box.type == FLOAT and vfp_arg_index < n_vfp_reg_args:
-                reg = r.vfp_argument_regs[vfp_arg_index]
-                self.try_allocate_reg(box, selected_reg=reg)
-                vfp_arg_index += 1
-            else:
-                # treat stack args as stack locations with a negative offset
-                if box.type == FLOAT:
-                    cur_frame_pos -= 2
-                    if count % 2 != 0: # Stack argument alignment
-                        cur_frame_pos -= 1
-                        count = 0
-                else:
-                    cur_frame_pos -= 1
-                    count += 1
-                loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
-                self.frame_manager.set_binding(box, loc)
+            assert box.type != FLOAT
+            self.fm.get_new_loc(box)
 
     def _update_bindings(self, locs, inputargs):
         used = {}
@@ -644,9 +586,19 @@
         return args
 
     def prepare_op_finish(self, op, fcond):
-        loc = self.loc(op.getarg(0))
-        self.possibly_free_var(op.getarg(0))
-        return [loc]
+        # the frame is in fp, but we have to point where in the frame is
+        # the potential argument to FINISH
+        descr = op.getdescr()
+        fail_descr = cast_instance_to_gcref(descr)
+        # we know it does not move, but well
+        rgc._make_sure_does_not_move(fail_descr)
+        fail_descr = rffi.cast(lltype.Signed, fail_descr)
+        if op.numargs() == 1:
+            loc = self.make_sure_var_in_reg(op.getarg(0))
+            locs = [loc, imm(fail_descr)]
+        else:
+            locs = [imm(fail_descr)]
+        return locs
 
     def prepare_op_guard_true(self, op, fcond):
         l0 = self.make_sure_var_in_reg(op.getarg(0))
diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -1,12 +1,18 @@
+from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
 from rpython.jit.backend.arm.assembler import AssemblerARM
 from rpython.jit.backend.arm.registers import all_regs, all_vfp_regs
+from rpython.jit.backend.llsupport import jitframe
+from rpython.jit.backend.llsupport.symbolic import WORD
 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+from rpython.jit.metainterp import history
+from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER
+from rpython.rlib.unroll import unrolling_iterable
 from rpython.rtyper.llinterp import LLInterpreter
 from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
-from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER
-from rpython.jit.backend.arm.arch import FORCE_INDEX_OFS
 
 
+jitframe.STATICSIZE = JITFRAME_FIXED_SIZE
+
 class AbstractARMCPU(AbstractLLCPU):
 
     supports_floats = True
@@ -18,14 +24,9 @@
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
-        if gcdescr is not None:
-            gcdescr.force_index_ofs = FORCE_INDEX_OFS
         AbstractLLCPU.__init__(self, rtyper, stats, opts,
                                translate_support_code, gcdescr)
 
-        from rpython.jit.backend.llsupport import jitframe
-        self.deadframe_size_max = llmemory.sizeof(jitframe.DEADFRAME,
-                                                  self.get_failargs_limit())
 
     def set_debug(self, flag):
         return self.assembler.set_debug(flag)
@@ -64,7 +65,11 @@
             setitem(index, null)
 
     def make_execute_token(self, *ARGS):
-        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, llmemory.GCREF))
+        FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF],
+                                             llmemory.GCREF))
+
+        lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)]
+        kinds = unrolling_iterable(lst)
 
         def execute_token(executable_token, *args):
             clt = executable_token.compiled_loop_token
@@ -74,18 +79,32 @@
             assert addr % 8 == 0
             func = rffi.cast(FUNCPTR, addr)
             #llop.debug_print(lltype.Void, ">>>> Entering", addr)
+            frame_info = clt.frame_info
+            frame = self.gc_ll_descr.malloc_jitframe(frame_info)
+            ll_frame = lltype.cast_opaque_ptr(llmemory.GCREF, frame)
             prev_interpreter = None   # help flow space
             if not self.translate_support_code:
                 prev_interpreter = LLInterpreter.current_interpreter
                 LLInterpreter.current_interpreter = self.debug_ll_interpreter
             try:
-                deadframe = func(*args)
+                num = JITFRAME_FIXED_SIZE * WORD
+                for i, kind in kinds:
+                    arg = args[i]
+                    if kind == history.INT:
+                        self.set_int_value(ll_frame, num, arg)
+                    elif kind == history.FLOAT:
+                        self.set_float_value(ll_frame, num, arg)
+                        num += WORD # on ARM(32 bit) a FLOAT needs two words
+                    else:
+                        assert kind == history.REF
+                        self.set_ref_value(ll_frame, num, arg)
+                    num += WORD
+                ll_frame = func(ll_frame)
             finally:
                 if not self.translate_support_code:
                     LLInterpreter.current_interpreter = prev_interpreter
             #llop.debug_print(lltype.Void, "<<<< Back")
-            self.gc_set_extra_threshold()
-            return deadframe
+            return ll_frame
         return execute_token
 
     def cast_ptr_to_int(x):
diff --git a/rpython/jit/backend/arm/support.py b/rpython/jit/backend/arm/support.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/arm/support.py
@@ -0,0 +1,61 @@
+from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
+from rpython.rlib.rarithmetic import r_uint
+from rpython.translator.tool.cbuild import ExternalCompilationInfo
+
+eci = ExternalCompilationInfo(post_include_bits=["""
+static int pypy__arm_int_div(int a, int b) {
+    return a/b;
+}
+static unsigned int pypy__arm_uint_div(unsigned int a, unsigned int b) {
+    return a/b;
+}
+static int pypy__arm_int_mod(int a, int b) {
+    return a % b;
+}
+"""])
+
+
+def arm_int_div_emulator(a, b):
+    return int(a / float(b))
+arm_int_div_sign = lltype.Ptr(
+        lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
+arm_int_div = rffi.llexternal(
+    "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed,
+                        _callable=arm_int_div_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+
+
+def arm_uint_div_emulator(a, b):
+    return r_uint(a) / r_uint(b)
+arm_uint_div_sign = lltype.Ptr(
+        lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned))
+arm_uint_div = rffi.llexternal(
+    "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned,
+                        _callable=arm_uint_div_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+
+
+def arm_int_mod_emulator(a, b):
+    sign = 1
+    if a < 0:
+        a = -1 * a
+        sign = -1
+    if b < 0:
+        b = -1 * b
+    res = a % b
+    return sign * res
+arm_int_mod_sign = arm_int_div_sign
+arm_int_mod = rffi.llexternal(
+    "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed,
+                        _callable=arm_int_mod_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+# ____________________________________________________________
+
+memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
+                                       rffi.SIZE_T], lltype.Void,
+                            sandboxsafe=True, _nowrapper=True)
+
+# ____________________________________________________________
diff --git a/rpython/jit/backend/arm/test/test_assembler.py b/rpython/jit/backend/arm/test/test_assembler.py
--- a/rpython/jit/backend/arm/test/test_assembler.py
+++ b/rpython/jit/backend/arm/test/test_assembler.py
@@ -1,6 +1,6 @@
 from rpython.jit.backend.arm import conditions as c
 from rpython.jit.backend.arm import registers as r
-from rpython.jit.backend.arm.arch import arm_int_div
+from rpython.jit.backend.arm.support import arm_int_div
 from rpython.jit.backend.arm.assembler import AssemblerARM
 from rpython.jit.backend.arm.locations import imm
 from rpython.jit.backend.arm.test.support import run_asm
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -335,6 +335,40 @@
 
     # ____________________________________________________________
 
+    def set_int_value(self, newframe, index, value):
+        """ Note that we keep index multiplied by WORD here mostly
+        for completeness with get_int_value and friends
+        """
+        descr = self.gc_ll_descr.getframedescrs(self).arraydescr
+        ofs = self.unpack_arraydescr(descr)
+        self.write_int_at_mem(newframe, ofs + index, WORD, 1, value)
+
+    def set_ref_value(self, newframe, index, value):
+        descr = self.gc_ll_descr.getframedescrs(self).arraydescr
+        ofs = self.unpack_arraydescr(descr)
+        self.write_ref_at_mem(newframe, ofs + index, value)
+
+    def set_float_value(self, newframe, index, value):
+        descr = self.gc_ll_descr.getframedescrs(self).arraydescr
+        ofs = self.unpack_arraydescr(descr)
+        self.write_float_at_mem(newframe, ofs + index, value)
+
+    @specialize.arg(1)
+    def get_ofs_of_frame_field(self, name):
+        descrs = self.gc_ll_descr.getframedescrs(self)
+        if name.startswith('jfi_'):
+            base_ofs = 0 # not relative to frame
+        else:
+            base_ofs = self.unpack_arraydescr(descrs.arraydescr)
+        ofs = self.unpack_fielddescr(getattr(descrs, name))
+        return ofs - base_ofs
+
+    def get_baseofs_of_frame_field(self):
+        descrs = self.gc_ll_descr.getframedescrs(self)
+        base_ofs = self.unpack_arraydescr(descrs.arraydescr)
+        return base_ofs
+    # ____________________________________________________________
+
 
     def bh_arraylen_gc(self, array, arraydescr):
         assert isinstance(arraydescr, ArrayDescr)
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -129,6 +129,8 @@
         assert fail.identifier == 1
 
     def test_compile_linear_float_loop(self):
+        if not self.cpu.supports_floats:
+            py.test.skip("requires floats")
         i0 = BoxFloat()
         i1 = BoxFloat()
         operations = [
diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -107,7 +107,7 @@
 
         lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)]
         kinds = unrolling_iterable(lst)
-        
+
         def execute_token(executable_token, *args):
             clt = executable_token.compiled_loop_token
             assert len(args) == clt._debug_nbargs
@@ -160,7 +160,7 @@
 
     def invalidate_loop(self, looptoken):
         from rpython.jit.backend.x86 import codebuf
-        
+
         for addr, tgt in looptoken.compiled_loop_token.invalidate_positions:
             mc = codebuf.MachineCodeBlockWrapper()
             mc.JMP_l(tgt)
@@ -178,38 +178,6 @@
             l[i].counter = ll_s.i
         return l
 
-    def set_int_value(self, newframe, index, value):
-        """ Note that we keep index multiplied by WORD here mostly
-        for completeness with get_int_value and friends
-        """
-        descr = self.gc_ll_descr.getframedescrs(self).arraydescr
-        ofs = self.unpack_arraydescr(descr)
-        self.write_int_at_mem(newframe, ofs + index, WORD, 1, value)
-
-    def set_ref_value(self, newframe, index, value):
-        descr = self.gc_ll_descr.getframedescrs(self).arraydescr
-        ofs = self.unpack_arraydescr(descr)
-        self.write_ref_at_mem(newframe, ofs + index, value)
-
-    def set_float_value(self, newframe, index, value):
-        descr = self.gc_ll_descr.getframedescrs(self).arraydescr
-        ofs = self.unpack_arraydescr(descr)
-        self.write_float_at_mem(newframe, ofs + index, value)
-
-    @specialize.arg(1)
-    def get_ofs_of_frame_field(self, name):
-        descrs = self.gc_ll_descr.getframedescrs(self)
-        if name.startswith('jfi_'):
-            base_ofs = 0 # not relative to frame
-        else:
-            base_ofs = self.unpack_arraydescr(descrs.arraydescr)
-        ofs = self.unpack_fielddescr(getattr(descrs, name))
-        return ofs - base_ofs
-
-    def get_baseofs_of_frame_field(self):
-        descrs = self.gc_ll_descr.getframedescrs(self)
-        base_ofs = self.unpack_arraydescr(descrs.arraydescr)
-        return base_ofs
 
 class CPU386(AbstractX86CPU):
     backend_name = 'x86'


More information about the pypy-commit mailing list