[pypy-commit] pypy ppc-jit-backend: Started implementation of CALL_ASSEMBLER

hager noreply at buildbot.pypy.org
Tue Nov 22 19:09:51 CET 2011


Author: hager <sven.hager at uni-duesseldorf.de>
Branch: ppc-jit-backend
Changeset: r49669:c489a73eaf5d
Date: 2011-11-22 19:09 +0100
http://bitbucket.org/pypy/pypy/changeset/c489a73eaf5d/

Log:	Started implementation of CALL_ASSEMBLER

diff --git a/pypy/jit/backend/ppc/ppcgen/codebuilder.py b/pypy/jit/backend/ppc/ppcgen/codebuilder.py
--- a/pypy/jit/backend/ppc/ppcgen/codebuilder.py
+++ b/pypy/jit/backend/ppc/ppcgen/codebuilder.py
@@ -928,6 +928,21 @@
         self.faillocs = faillocs
         self.save_exc = save_exc
 
+class OverwritingBuilder(PPCAssembler):
+    def __init__(self, cb, start, size):
+        PPCAssembler.__init__(self)
+        self.cb = cb
+        self.index = start
+        self.end = start + size
+
+    def currpos(self):
+        return self.index
+
+    def writechar(self, char):
+        assert self.index <= self.end
+        self.cb.overwrite(self.index, char)
+        self.index += 1
+
 class PPCBuilder(BlockBuilderMixin, PPCAssembler):
     def __init__(self, failargs_limit=1000, r0_in_use=False):
         PPCAssembler.__init__(self)
@@ -964,6 +979,12 @@
         else:
             self.stdx(source_reg.value, 0, r.r0.value)
 
+    def b_offset(self, offset):
+        curpos = self.currpos()
+        target_ofs = offset - curpos
+        assert target_ofs < (1 << 24)
+        self.b(target_ofs)
+
     def b_cond_offset(self, offset, condition):
         pos = self.currpos()
         target_ofs = offset - pos
diff --git a/pypy/jit/backend/ppc/ppcgen/helper/assembler.py b/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
--- a/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
@@ -1,8 +1,9 @@
 import pypy.jit.backend.ppc.ppcgen.condition as c
 from pypy.rlib.rarithmetic import r_uint, r_longlong, intmask
-from pypy.jit.backend.ppc.ppcgen.arch import MAX_REG_PARAMS, IS_PPC_32
+from pypy.jit.backend.ppc.ppcgen.arch import MAX_REG_PARAMS, IS_PPC_32, WORD
 from pypy.jit.metainterp.history import FLOAT
 from pypy.rlib.unroll import unrolling_iterable
+import pypy.jit.backend.ppc.ppcgen.register as r
 
 def gen_emit_cmp_op(condition, signed=True):
     def f(self, op, arglocs, regalloc):
@@ -86,20 +87,28 @@
 
 class saved_registers(object):
     def __init__(self, assembler, regs_to_save, regalloc=None):
-        self.assembler = assembler
+        self.mc = assembler
         self.regalloc = regalloc
         if self.regalloc:
-            self._filter_regs(regs_to_save, vfp_regs_to_save)
+            assert 0, "not implemented yet"
         else:
             self.regs = regs_to_save
 
     def __enter__(self):
         if len(self.regs) > 0:
-            self.assembler.PUSH([r.value for r in self.regs])
+            space = WORD * len(self.regs)
+            self.mc.addi(r.SP.value, r.SP.value, -space)
+            for i, reg in enumerate(self.regs):
+                if IS_PPC_32:
+                    self.mc.stw(reg.value, r.SP.value, i * WORD)
+                else:
+                    self.mc.std(reg.value, r.SP.value, i * WORD)
 
-    def _filter_regs(self, regs_to_save, vfp_regs_to_save):
-        regs = []
-        for box, reg in self.regalloc.rm.reg_bindings.iteritems():
-            if reg is r.ip or (reg in regs_to_save and self.regalloc.stays_alive(box)):
-                regs.append(reg)
-        self.regs = regs
+    def __exit__(self, *args):
+        if len(self.regs) > 0:
+            space = WORD * len(self.regs)
+            for i, reg in enumerate(self.regs):
+                if IS_PPC_32:
+                    self.mc.lwz(reg.value, r.SP.value, i * WORD)
+                else:
+                    self.mc.ld(reg.value, r.SP.value, i * WORD)
diff --git a/pypy/jit/backend/ppc/ppcgen/opassembler.py b/pypy/jit/backend/ppc/ppcgen/opassembler.py
--- a/pypy/jit/backend/ppc/ppcgen/opassembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/opassembler.py
@@ -6,11 +6,14 @@
                                               GPR_SAVE_AREA, BACKCHAIN_SIZE,
                                               MAX_REG_PARAMS)
 
-from pypy.jit.metainterp.history import LoopToken, AbstractFailDescr, FLOAT
+from pypy.jit.metainterp.history import (LoopToken, AbstractFailDescr, FLOAT,
+                                         INT)
 from pypy.rlib.objectmodel import we_are_translated
-from pypy.jit.backend.ppc.ppcgen.helper.assembler import count_reg_args 
+from pypy.jit.backend.ppc.ppcgen.helper.assembler import (count_reg_args,
+                                                          saved_registers)
 from pypy.jit.backend.ppc.ppcgen.jump import remap_frame_layout
-from pypy.jit.backend.ppc.ppcgen.regalloc import TempPtr
+from pypy.jit.backend.ppc.ppcgen.codebuilder import OverwritingBuilder
+from pypy.jit.backend.ppc.ppcgen.regalloc import TempPtr, TempInt
 from pypy.jit.backend.llsupport import symbolic
 from pypy.rpython.lltypesystem import rstr, rffi, lltype
 
@@ -840,6 +843,128 @@
 
     _mixin_ = True
 
+    # from: ../x86/assembler.py:1668
+    # XXX Split into some helper methods
+    def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc):
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self._write_fail_index(fail_index)
+
+        descr = op.getdescr()
+        assert isinstance(descr, LoopToken)
+        # XXX check this
+        assert op.numargs() == len(descr._ppc_arglocs[0])
+        resbox = TempInt()
+        self._emit_call(fail_index, descr._ppc_direct_bootstrap_code, op.getarglist(),
+                                regalloc, result=resbox)
+        if op.result is None:
+            value = self.cpu.done_with_this_frame_void_v
+        else:
+            kind = op.result.type
+            if kind == INT:
+                value = self.cpu.done_with_this_frame_int_v
+            elif kind == REF:
+                value = self.cpu.done_with_this_frame_ref_v
+            elif kind == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                raise AssertionError(kind)
+        # check value
+        resloc = regalloc.try_allocate_reg(resbox)
+        assert resloc is r.r3
+        self.mc.alloc_scratch_reg(value)
+        if IS_PPC_32:
+            self.mc.cmpw(0, resloc.value, r.r0.value)
+        else:
+            self.mc.cmpd(0, resloc.value, r.r0.value)
+        self.mc.free_scratch_reg()
+        regalloc.possibly_free_var(resbox)
+
+        fast_jmp_pos = self.mc.currpos()
+        self.mc.nop()
+
+        # Path A: use assembler helper
+        # if values are equal we take the fast path
+        # Slow path, calling helper
+        # jump to merge point
+        jd = descr.outermost_jitdriver_sd
+        assert jd is not None
+        asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
+        with saved_registers(self.mc, r.NONVOLATILES + [r.r3]):
+            # resbox is already in r3
+            self.mov_loc_loc(arglocs[1], r.r4)
+            self.mc.bl_abs(asm_helper_adr)
+            if op.result:
+                resloc = regalloc.after_call(op.result)
+                if resloc.is_vfp_reg():
+                    assert 0, "not implemented yet"
+
+        # jump to merge point
+        jmp_pos = self.mc.currpos()
+        self.mc.nop()
+
+        # Path B: load return value and reset token
+        # Fast Path using result boxes
+        # patch the jump to the fast path
+        offset = self.mc.currpos() - fast_jmp_pos
+        pmc = OverwritingBuilder(self.mc, fast_jmp_pos, WORD)
+        pmc.b(offset)
+
+        # Reset the vable token --- XXX really too much special logic here:-(
+        if jd.index_of_virtualizable >= 0:
+            from pypy.jit.backend.llsupport.descr import BaseFieldDescr
+            fielddescr = jd.vable_token_descr
+            assert isinstance(fielddescr, BaseFieldDescr)
+            ofs = fielddescr.offset
+            resloc = regalloc.force_allocate_reg(resbox)
+            self.alloc_scratch_reg()
+            self.mov_loc_loc(arglocs[1], r.r0)
+            self.mc.li(resloc.value, 0)
+            if IS_PPC_32:
+                self.mc.stwx(resloc.value, 0, r.r0.value)
+            else:
+                self.mc.stdx(resloc.value, 0, r.r0.value)
+            self.free_scratch_reg()
+            regalloc.possibly_free_var(resbox)
+
+        if op.result is not None:
+            # load the return value from fail_boxes_xxx[0]
+            kind = op.result.type
+            if kind == INT:
+                adr = self.fail_boxes_int.get_addr_for_num(0)
+            elif kind == REF:
+                adr = self.fail_boxes_ptr.get_addr_for_num(0)
+            elif kind == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                raise AssertionError(kind)
+            resloc = regalloc.force_allocate_reg(op.result)
+            regalloc.possibly_free_var(resbox)
+            self.mc.alloc_scratch_reg(adr)
+            if op.result.type == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                if IS_PPC_32:
+                    self.mc.lwzx(resloc.value, 0, r.r0.value)
+                else:
+                    self.mc.ldx(resloc.value, 0, r.r0.value)
+            self.mc.free_scratch_reg()
+
+        # merge point
+        offset = self.mc.currpos() - jmp_pos
+
+        self.mc.alloc_scratch_reg()
+        if IS_PPC_32:
+            self.mc.cmpwi(0, r.r0.value, 0)
+            self.mc.lwz(r.r0.value, r.SPP.value, 0)
+        else:
+            self.mc.cmpdi(0, r.r0.value, 0)
+            self.mc.ld(r.r0.value, r.SPP.value, 0)
+        self.mc.cror(2, 1, 2)
+        self.mc.free_scratch_reg()
+
+        self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.EQ)
+
     def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
         self.mc.mr(r.r0.value, r.SP.value)
         if IS_PPC_32:
diff --git a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
@@ -8,12 +8,14 @@
 from pypy.jit.backend.ppc.ppcgen.opassembler import OpAssembler
 from pypy.jit.backend.ppc.ppcgen.symbol_lookup import lookup
 from pypy.jit.backend.ppc.ppcgen.codebuilder import PPCBuilder
+from pypy.jit.backend.ppc.ppcgen.jump import remap_frame_layout
 from pypy.jit.backend.ppc.ppcgen.arch import (IS_PPC_32, IS_PPC_64, WORD,
                                               NONVOLATILES,
                                               GPR_SAVE_AREA, BACKCHAIN_SIZE)
 from pypy.jit.backend.ppc.ppcgen.helper.assembler import (gen_emit_cmp_op, 
                                                           encode32, decode32,
-                                                          decode64)
+                                                          decode64,
+                                                          count_reg_args)
 import pypy.jit.backend.ppc.ppcgen.register as r
 import pypy.jit.backend.ppc.ppcgen.condition as c
 from pypy.jit.metainterp.history import (Const, ConstPtr, LoopToken,
@@ -164,9 +166,15 @@
             clt.asmmemmgr = []
         return clt.asmmemmgr_blocks
 
+    def _make_prologue(self, target_pos, frame_depth):
+        self._make_frame(frame_depth)
+        curpos = self.mc.currpos()
+        offset = target_pos - curpos
+        self.mc.b(offset)
+
     # The code generated here allocates a new stackframe 
     # and is the first machine code to be executed.
-    def _make_prologue(self, target_pos, frame_depth):
+    def _make_frame(self, frame_depth):
         if IS_PPC_32:
             # save it in previous frame (Backchain)
             self.mc.stwu(r.SP.value, r.SP.value, -frame_depth)
@@ -192,10 +200,6 @@
         else:
             self.mc.ld(r.r30.value, r.SP.value, WORD)
             self.mc.std(r.r30.value, r.SPP.value, WORD * len(NONVOLATILES))
-        # branch to loop code
-        curpos = self.mc.currpos()
-        offset = target_pos - curpos
-        self.mc.b(offset)
 
     def setup_failure_recovery(self):
 
@@ -448,6 +452,86 @@
             if loc.is_stack():
                 self.regalloc_mov(r.r0, loc)
 
+    def gen_direct_bootstrap_code(self, loophead, looptoken, inputargs, frame_depth):
+        self._make_frame(frame_depth)
+        nonfloatlocs = looptoken._ppc_arglocs[0]
+
+        reg_args = count_reg_args(inputargs)
+
+        stack_locs = len(inputargs) - reg_args
+
+        selected_reg = 0
+        count = 0
+        nonfloat_args = []
+        nonfloat_regs = []
+        # load reg args
+        for i in range(reg_args):
+            arg = inputargs[i]
+            if arg.type == FLOAT and count % 2 != 0:
+                assert 0, "not implemented yet"
+            reg = r.PARAM_REGS[selected_reg]
+
+            if arg.type == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                nonfloat_args.append(reg)
+                nonfloat_regs.append(nonfloatlocs[i])
+
+            if arg.type == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                selected_reg += 1
+                count += 1
+
+        # remap values stored in core registers
+        self.mc.alloc_scratch_reg()
+        remap_frame_layout(self, nonfloat_args, nonfloat_regs, r.r0)
+        self.mc.free_scratch_reg()
+
+        # load values passed on the stack to the corresponding locations
+        stack_position = self.GPR_SAVE_AREA_AND_FORCE_INDEX
+
+        count = 0
+        for i in range(reg_args, len(inputargs)):
+            arg = inputargs[i]
+            if arg.type == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                loc = nonfloatlocs[i]
+            if loc.is_reg():
+                if IS_PPC_32:
+                    self.mc.lwz(loc.value, r.SPP.value, stack_position)
+                else:
+                    self.mc.ld(loc.value, r.SPP.value, stack_position)
+                count += 1
+            elif loc.is_vfp_reg():
+                assert 0, "not implemented yet"
+            elif loc.is_stack():
+                if loc.type == FLOAT:
+                    assert 0, "not implemented yet"
+                elif loc.type == INT or loc.type == REF:
+                    count += 1
+                    self.mc.alloc_scratch_reg()
+                    if IS_PPC_32:
+                        self.mc.lwz(r.r0.value, r.SPP.value, stack_position)
+                    else:
+                        self.mc.ld(r.r0.value, r.SPP.value, stack_position)
+                    self.mov_loc_loc(r.r0, loc)
+                    self.mc.free_scratch_reg()
+                else:
+                    assert 0, 'invalid location'
+            else:
+                assert 0, 'invalid location'
+            if loc.type == FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                size = 1
+            stack_position += size * WORD
+
+        #sp_patch_location = self._prepare_sp_patch_position()
+        self.mc.b_offset(loophead)
+        #self._patch_sp_offset(sp_patch_location, looptoken._ppc_frame_depth)
+
     def setup(self, looptoken, operations):
         assert self.memcpy_addr != 0
         self.current_clt = looptoken.compiled_loop_token 
@@ -512,9 +596,13 @@
         looptoken._ppc_frame_manager_depth = regalloc.frame_manager.frame_depth
         self._make_prologue(regalloc_head, frame_depth)
      
+        direct_bootstrap_code = self.mc.currpos()
+        self.gen_direct_bootstrap_code(loophead, looptoken, inputargs, frame_depth)
+
         self.write_pending_failure_recoveries()
         loop_start = self.materialize_loop(looptoken, False)
         looptoken._ppc_bootstrap_code = loop_start
+        looptoken._ppc_direct_bootstrap_code = loop_start + direct_bootstrap_code
         real_start = loop_start + start_pos
         if IS_PPC_32:
             looptoken.ppc_code = real_start
@@ -621,7 +709,7 @@
             if op.has_no_side_effect() and op.result not in regalloc.longevity:
                 regalloc.possibly_free_vars_for_op(op)
             elif self.can_merge_with_next_guard(op, pos, operations)\
-                    and opnum == rop.CALL_RELEASE_GIL:  # XXX fix  
+                    and opnum in (rop.CALL_RELEASE_GIL, rop.CALL_ASSEMBLER):  # XXX fix  
                 regalloc.next_instruction()
                 arglocs = regalloc.operations_with_guard[opnum](regalloc, op,
                                         operations[pos+1])
@@ -800,6 +888,7 @@
                 return
             assert 0, "not supported location"
         assert 0, "not supported location"
+    mov_loc_loc = regalloc_mov
 
     def regalloc_push(self, loc):
         """Pushes the value stored in loc to the stack
@@ -894,11 +983,11 @@
             return 0
 
     def _write_fail_index(self, fail_index):
-        self.mc.load_imm(r.r0.value, fail_index)
+        self.mc.load_imm(r.r0, fail_index)
         if IS_PPC_32:
-            self.mc.stw(r.r0.value, r.SSP.value, 0)
+            self.mc.stw(r.r0.value, r.SPP.value, 0)
         else:
-            self.mc.std(r.r0.value, r.SSP.value, 0)
+            self.mc.std(r.r0.value, r.SPP.value, 0)
             
     def load(self, loc, value):
         assert loc.is_reg() and value.is_imm()
diff --git a/pypy/jit/backend/ppc/ppcgen/regalloc.py b/pypy/jit/backend/ppc/ppcgen/regalloc.py
--- a/pypy/jit/backend/ppc/ppcgen/regalloc.py
+++ b/pypy/jit/backend/ppc/ppcgen/regalloc.py
@@ -238,6 +238,12 @@
         return self.rm.make_sure_var_in_reg(var, forbidden_vars,
                 selected_reg, need_lower_byte)
 
+    def _sync_var(self, v):
+        if v.type == FLOAT:
+            assert 0, "not implemented yet"
+        else:
+            self.rm._sync_var(v)
+
     # ******************************************************
     # *         P R E P A R E  O P E R A T I O N S         * 
     # ******************************************************
@@ -715,6 +721,21 @@
     prepare_debug_merge_point = void
     prepare_jit_debug = void
 
+    def prepare_guard_call_assembler(self, op, guard_op):
+        descr = op.getdescr()
+        assert isinstance(descr, LoopToken)
+        jd = descr.outermost_jitdriver_sd
+        assert jd is not None
+        size = jd.portal_calldescr.get_result_size(self.cpu.translate_support_code)
+        vable_index = jd.index_of_virtualizable
+        if vable_index >= 0:
+            self._sync_var(op.getarg(vable_index))
+            vable = self.frame_manager.loc(op.getarg(vable_index))
+        else:
+            vable = imm(0)
+        self.possibly_free_vars(guard_op.getfailargs())
+        return [imm(size), vable]
+
     def _prepare_args_for_new_op(self, new_args):
         gc_ll_descr = self.cpu.gc_ll_descr
         args = gc_ll_descr.args_for_new(new_args)


More information about the pypy-commit mailing list