[pypy-svn] r79234 - pypy/branch/arm-backend/pypy/jit/backend/arm

david at codespeak.net david at codespeak.net
Thu Nov 18 11:49:53 CET 2010


Author: david
Date: Thu Nov 18 11:49:51 2010
New Revision: 79234

Modified:
   pypy/branch/arm-backend/pypy/jit/backend/arm/arch.py
   pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py
   pypy/branch/arm-backend/pypy/jit/backend/arm/codebuilder.py
   pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py
Log:
Implement call_assembler operation,
for now without reseting the vable token Generate a second entry point to a
loop passing the arguments in registers and the stack according to the
calling convention for ARM used which is used in the call_assembler
operation.


Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/arch.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/arch.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/arch.py	Thu Nov 18 11:49:51 2010
@@ -3,6 +3,8 @@
 
 FUNC_ALIGN=8
 WORD=4
+# The Address in the PC points two words befind the current instruction
+PC_OFFSET = 8
 
 arm_int_div_sign = lltype.Ptr(lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
 def arm_int_div(a, b):

Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py	Thu Nov 18 11:49:51 2010
@@ -223,6 +223,38 @@
             regs.append(reg)
             regalloc.possibly_free_var(reg)
         looptoken._arm_arglocs = regs
+        return regs
+
+    direct_bootstrap_code_size=100*WORD
+    def gen_direct_bootstrap_code(self, arglocs, loop_head, regalloc):
+        self.mc.ensure_can_fit(self.direct_bootstrap_code_size)
+        self.gen_func_prolog()
+        if len(arglocs) > 4:
+            reg_args = 4
+        else:
+            reg_args = len(arglocs)
+
+        stack_locs = len(arglocs) - reg_args
+
+        for i in range(reg_args):
+            loc = arglocs[i]
+            self.mc.MOV_rr(loc.value, i)
+
+        for i in range(stack_locs):
+            loc = arglocs[reg_args + i]
+            stack_position = (len(r.callee_saved_registers) + 1 +i)*WORD
+            if loc.is_reg():
+                self.mc.LDR_ri(loc.value, r.fp.value, stack_position)
+            elif loc.is_stack():
+                self.mc.PUSH([r.r0.value])
+                self.mc.LDR_ri(r.ip, r.fp.value, stack_position)
+                self.mov_loc_loc(r.ip, loc)
+                self.mc.POP([r.r0.value])
+            else:
+                assert 0, 'invalid location'
+        sp_patch_location = self._prepare_sp_patch_location()
+        self.mc.B(loop_head)
+        self._patch_sp_offset(sp_patch_location, regalloc)
 
     # cpu interface
     def assemble_loop(self, inputargs, operations, looptoken):
@@ -232,9 +264,10 @@
         loop_start=self.mc.curraddr()
         self.gen_func_prolog()
 
+
+        arglocs = self.gen_bootstrap_code(inputargs, regalloc, looptoken)
         sp_patch_location = self._prepare_sp_patch_location()
 
-        self.gen_bootstrap_code(inputargs, regalloc, looptoken)
         loop_head=self.mc.curraddr()
         looptoken._arm_bootstrap_code = loop_start
         looptoken._arm_loop_code = loop_head
@@ -243,6 +276,11 @@
 
         self._patch_sp_offset(sp_patch_location, regalloc)
 
+        self.align()
+
+        looptoken._arm_direct_bootstrap_code = self.mc.curraddr()
+        self.gen_direct_bootstrap_code(arglocs, loop_head, regalloc)
+
         if self._debug_asm:
             self._dump_trace('loop.asm')
         print 'Done assembling'

Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/codebuilder.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/codebuilder.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/codebuilder.py	Thu Nov 18 11:49:51 2010
@@ -39,6 +39,9 @@
     def ensure_can_fit(self, n):
         raise NotImplentedError
 
+    def NOP(self):
+        self.MOV_rr(0, 0)
+
     def PUSH(self, regs, cond=cond.AL):
         assert reg.sp.value not in regs
         instr = self._encode_reg_list(cond << 28 | 0x92D << 16, regs)
@@ -55,7 +58,7 @@
     def B(self, target, c=cond.AL, some_reg=None):
         if c == cond.AL:
             self.ensure_can_fit(2*WORD)
-            self.LDR_ri(reg.pc.value, reg.pc.value, -4)
+            self.LDR_ri(reg.pc.value, reg.pc.value, -arch.PC_OFFSET/2)
             self.write32(target)
         else:
             assert some_reg is not None
@@ -66,8 +69,8 @@
     def BL(self, target, c=cond.AL, some_reg=None):
         if c == cond.AL:
             self.ensure_can_fit(3*WORD)
-            self.ADD_ri(reg.lr.value, reg.pc.value, 4)
-            self.LDR_ri(reg.pc.value, reg.pc.value, imm=-4)
+            self.ADD_ri(reg.lr.value, reg.pc.value, arch.PC_OFFSET/2)
+            self.LDR_ri(reg.pc.value, reg.pc.value, imm=-arch.PC_OFFSET/2)
             self.write32(target)
         else:
             assert some_reg is not None
@@ -108,6 +111,9 @@
     def curraddr(self):
         return self.baseaddr() + self._pos
 
+    def currpos(self):
+        return self._pos
+
     size_of_gen_load_int = 7 * WORD
     def gen_load_int(self, r, value, cond=cond.AL):
         """r is the register number, value is the value to be loaded to the

Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py	Thu Nov 18 11:49:51 2010
@@ -3,7 +3,8 @@
 from pypy.jit.backend.arm import registers as r
 from pypy.jit.backend.arm import shift
 from pypy.jit.backend.arm.arch import (WORD, FUNC_ALIGN, arm_int_div,
-                                        arm_int_div_sign, arm_int_mod_sign, arm_int_mod)
+                                        arm_int_div_sign, arm_int_mod_sign,
+                                        arm_int_mod, PC_OFFSET)
 
 from pypy.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
                                                     gen_emit_op_unary_cmp,
@@ -13,7 +14,8 @@
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.backend.llsupport.descr import BaseFieldDescr, BaseArrayDescr
 from pypy.jit.backend.llsupport.regalloc import compute_vars_longevity, TempBox
-from pypy.jit.metainterp.history import Const, ConstInt, BoxInt, BasicFailDescr
+from pypy.jit.metainterp.history import (Const, ConstInt, BoxInt,
+                                        BasicFailDescr, LoopToken, INT, REF)
 from pypy.jit.metainterp.resoperation import rop
 from pypy.rlib import rgc
 from pypy.rlib.objectmodel import we_are_translated
@@ -232,24 +234,44 @@
         return fcond
 
     def emit_op_call(self, op, regalloc, fcond, save_all_regs=False):
+        adr = self.cpu.cast_adr_to_int(op.getarg(0).getint())
+        args = op.getarglist()[1:]
+        cond =  self._emit_call(adr, args, regalloc, fcond, save_all_regs, op.result)
+
+        descr = op.getdescr()
+        #XXX Hack, Hack, Hack
+        if not we_are_translated() and not isinstance(descr, LoopToken):
+            l = regalloc.loc(op.result)
+            # XXX we need descr.get_result_sign here!!!!
+            size = descr.get_result_size(False)
+            # for now just check the size of the value
+            if size == 1: #unsigned char
+                self.mc.AND_ri(l.value, l.value, 255)
+            elif size == 2: # signed short
+                self.mc.LSL_ri(l.value, l.value, 16)
+                self.mc.ASR_ri(l.value, l.value, 16)
+        return cond
+
+    def _emit_call(self, adr, args, regalloc, fcond=c.AL, save_all_regs=False, result=None):
         locs = []
         # all arguments past the 4th go on the stack
         # XXX support types other than int (one word types)
-        if op.numargs() > 5:
-            stack_args = op.numargs() - 5
+        n = 0
+        n_args = len(args)
+        if n_args > 4:
+            stack_args = n_args - 4
             n = stack_args*WORD
             self._adjust_sp(n, regalloc, fcond=fcond)
-            for i in range(5, op.numargs()):
-                reg = regalloc.make_sure_var_in_reg(op.getarg(i))
-                self.mc.STR_ri(reg.value, r.sp.value, (i-5)*WORD)
+            for i in range(4, n_args):
+                reg = regalloc.make_sure_var_in_reg(args[i])
+                self.mc.STR_ri(reg.value, r.sp.value, (i-4)*WORD)
                 regalloc.possibly_free_var(reg)
 
-        adr = self.cpu.cast_adr_to_int(op.getarg(0).getint())
 
-        reg_args = min(op.numargs()-1, 4)
-        for i in range(1, reg_args+1):
-            l = regalloc.make_sure_var_in_reg(op.getarg(i),
-                                            selected_reg=r.all_regs[i-1])
+        reg_args = min(n_args, 4)
+        for i in range(0, reg_args):
+            l = regalloc.make_sure_var_in_reg(args[i],
+                                            selected_reg=r.all_regs[i])
             locs.append(l)
         # XXX use PUSH here instead of spilling every reg for itself
         if save_all_regs:
@@ -257,23 +279,15 @@
         else:
             regalloc.before_call()
         self.mc.BL(adr)
-        #XXX Hack, Hack, Hack
-        if not we_are_translated():
-            descr = op.getdescr()
-            # XXX we need descr.get_result_sign here!!!!
-            size = descr.get_result_size(False)
-            # for now just check the size of the value
-            if size == 1: #unsigned char
-                self.mc.AND_ri(r.r0.value, r.r0.value, 255)
-            elif size == 2: # signed short
-                self.mc.LSL_ri(r.r0.value, r.r0.value, 16)
-                self.mc.ASR_ri(r.r0.value, r.r0.value, 16)
 
-        regalloc.after_call(op.result)
+        if result:
+            regalloc.after_call(result)
         # readjust the sp in case we passed some args on the stack
-        if op.numargs() > 5:
+        if n_args > 4:
+            assert n > 0
             self._adjust_sp(-n, regalloc, fcond=fcond)
         regalloc.possibly_free_vars(locs)
+        return fcond
 
     def emit_op_same_as(self, op, regalloc, fcond):
         resloc = regalloc.force_allocate_reg(op.result)
@@ -401,6 +415,7 @@
         if scale > 0:
             self.mc.LSL_ri(ofs_loc.value, ofs_loc.value, scale)
         f(res.value, base_loc.value, ofs_loc.value, cond=fcond)
+        return fcond
 
     emit_op_getarrayitem_raw = emit_op_getarrayitem_gc
     emit_op_getarrayitem_gc_pure = emit_op_getarrayitem_gc
@@ -543,25 +558,132 @@
         self.mc.MOV_rr(res_loc.value, r.fp.value)
         return fcond
 
-    def emit_guard_call_may_force(self, op, guard_op, regalloc, fcond):
+    # from: ../x86/assembler.py:1668
+    def emit_guard_call_assembler(self, op, guard_op, regalloc, fcond):
         faildescr = guard_op.getdescr()
         fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index, regalloc)
+
+        descr = op.getdescr()
+        assert isinstance(descr, LoopToken)
+
+        resbox = TempBox()
+        self._emit_call(descr._arm_direct_bootstrap_code, op.getarglist(), regalloc, fcond, resbox)
+        #self.mc.ensure_bytes_available(256)
+        if op.result is None:
+            value = self.cpu.done_with_this_frame_void_v
+        else:
+            kind = op.result.type
+            if kind == INT:
+                value = self.cpu.done_with_this_frame_int_v
+            elif kind == REF:
+                value = self.cpu.done_with_this_frame_ref_v
+            elif kind == FLOAT:
+                value = self.cpu.done_with_this_frame_float_v
+            else:
+                raise AssertionError(kind)
+        assert value <= 0xff
+
+        # check value
+        t = TempBox()
+        resloc = regalloc.force_allocate_reg(resbox)
+        loc = regalloc.force_allocate_reg(t, [r.r0])
+        self.mc.gen_load_int(loc.value, value)
+        self.mc.CMP_rr(resloc.value, loc.value)
+        regalloc.possibly_free_var(resbox)
+
+        fast_jmp_pos = self.mc.currpos()
+        fast_jmp_location = self.mc.curraddr()
+        self.mc.NOP()
+
+        #if values are equal we take the fast pat
+        # Slow path, calling helper
+        # jump to merge point
+        jd = descr.outermost_jitdriver_sd
+        assert jd is not None
+        asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
+        self._emit_call(asm_helper_adr, [t, op.getarg(0)], regalloc, fcond, False, op.result)
+        regalloc.possibly_free_var(t)
+
+        # jump to merge point
+        jmp_pos = self.mc.currpos()
+        jmp_location = self.mc.curraddr()
+        self.mc.NOP()
+
+        # Fast Path using result boxes
+        # patch the jump to the fast path
+        offset = self.mc.currpos() - fast_jmp_pos
+        pmc = ARMv7InMemoryBuilder(fast_jmp_location, WORD)
+        pmc.ADD_ri(r.pc.value, r.pc.value, offset - PC_OFFSET, cond=c.EQ)
+
+        # Reset the vable token --- XXX really too much special logic here:-(
+        # XXX Enable and fix this once the stange errors procuded by its
+        # presence are fixed
+        #if jd.index_of_virtualizable >= 0:
+        #    from pypy.jit.backend.llsupport.descr import BaseFieldDescr
+        #    size = jd.portal_calldescr.get_result_size(self.cpu.translate_support_code)
+        #    vable_index = jd.index_of_virtualizable
+        #    regalloc._sync_var(op.getarg(vable_index))
+        #    vable = regalloc.frame_manager.loc(op.getarg(vable_index))
+        #    fielddescr = jd.vable_token_descr
+        #    assert isinstance(fielddescr, BaseFieldDescr)
+        #    ofs = fielddescr.offset
+        #    self.mc.MOV(eax, arglocs[1])
+        #    self.mc.MOV_mi((eax.value, ofs), 0)
+        #    # in the line above, TOKEN_NONE = 0
+
+        if op.result is not None:
+            # load the return value from fail_boxes_xxx[0]
+            loc = regalloc.force_allocate_reg(t)
+            resloc = regalloc.force_allocate_reg(op.result, [t])
+            kind = op.result.type
+            if kind == INT:
+                adr = self.fail_boxes_int.get_addr_for_num(0)
+            elif kind == REF:
+                adr = self.fail_boxes_ptr.get_addr_for_num(0)
+            else:
+                raise AssertionError(kind)
+            self.mc.gen_load_int(loc.value, adr)
+            self.mc.LDR_ri(resloc.value, loc.value)
+            regalloc.possibly_free_var(t)
+
+        offset = self.mc.currpos() - jmp_pos
+        pmc = ARMv7InMemoryBuilder(jmp_location, WORD)
+        pmc.ADD_ri(r.pc.value, r.pc.value, offset - PC_OFFSET)
         t = TempBox()
         l0 = regalloc.force_allocate_reg(t)
-        self.mc.gen_load_int(l0.value, fail_index)
-        self.mc.STR_ri(l0.value, r.fp.value)
+        self.mc.LDR_ri(l0.value, r.fp.value)
+        self.mc.CMP_ri(l0.value, 0)
+        regalloc.possibly_free_var(t)
+        regalloc.possibly_free_vars_for_op(op)
+
+        self._emit_guard(guard_op, regalloc, c.LT)
+        return fcond
+
+    def emit_guard_call_may_force(self, op, guard_op, regalloc, fcond):
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index, regalloc)
 
         # force all reg values to be spilled when calling
         fcond = self.emit_op_call(op, regalloc, fcond, save_all_regs=True)
 
+        t = TempBox()
+        l0 = regalloc.force_allocate_reg(t)
         self.mc.LDR_ri(l0.value, r.fp.value)
         self.mc.CMP_ri(l0.value, 0)
-
         regalloc.possibly_free_var(t)
 
         self._emit_guard(guard_op, regalloc, c.LT)
         return fcond
 
+    def _write_fail_index(self, fail_index, regalloc):
+        t = TempBox()
+        l0 = regalloc.force_allocate_reg(t)
+        self.mc.gen_load_int(l0.value, fail_index)
+        self.mc.STR_ri(l0.value, r.fp.value)
+        regalloc.possibly_free_var(t)
+
 class ResOpAssembler(GuardOpAssembler, IntOpAsslember,
                     OpAssembler, UnaryIntOpAssembler,
                     FieldOpAssembler, ArrayOpAssember,



More information about the Pypy-commit mailing list