[pypy-commit] pypy ppc-jit-backend: Started implementation of CALL_ASSEMBLER
hager
noreply at buildbot.pypy.org
Tue Nov 22 19:09:51 CET 2011
Author: hager <sven.hager at uni-duesseldorf.de>
Branch: ppc-jit-backend
Changeset: r49669:c489a73eaf5d
Date: 2011-11-22 19:09 +0100
http://bitbucket.org/pypy/pypy/changeset/c489a73eaf5d/
Log: Started implementation of CALL_ASSEMBLER
diff --git a/pypy/jit/backend/ppc/ppcgen/codebuilder.py b/pypy/jit/backend/ppc/ppcgen/codebuilder.py
--- a/pypy/jit/backend/ppc/ppcgen/codebuilder.py
+++ b/pypy/jit/backend/ppc/ppcgen/codebuilder.py
@@ -928,6 +928,21 @@
self.faillocs = faillocs
self.save_exc = save_exc
+class OverwritingBuilder(PPCAssembler):
+ def __init__(self, cb, start, size):
+ PPCAssembler.__init__(self)
+ self.cb = cb
+ self.index = start
+ self.end = start + size
+
+ def currpos(self):
+ return self.index
+
+ def writechar(self, char):
+ assert self.index <= self.end
+ self.cb.overwrite(self.index, char)
+ self.index += 1
+
class PPCBuilder(BlockBuilderMixin, PPCAssembler):
def __init__(self, failargs_limit=1000, r0_in_use=False):
PPCAssembler.__init__(self)
@@ -964,6 +979,12 @@
else:
self.stdx(source_reg.value, 0, r.r0.value)
+ def b_offset(self, offset):
+ curpos = self.currpos()
+ target_ofs = offset - curpos
+ assert target_ofs < (1 << 24)
+ self.b(target_ofs)
+
def b_cond_offset(self, offset, condition):
pos = self.currpos()
target_ofs = offset - pos
diff --git a/pypy/jit/backend/ppc/ppcgen/helper/assembler.py b/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
--- a/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/helper/assembler.py
@@ -1,8 +1,9 @@
import pypy.jit.backend.ppc.ppcgen.condition as c
from pypy.rlib.rarithmetic import r_uint, r_longlong, intmask
-from pypy.jit.backend.ppc.ppcgen.arch import MAX_REG_PARAMS, IS_PPC_32
+from pypy.jit.backend.ppc.ppcgen.arch import MAX_REG_PARAMS, IS_PPC_32, WORD
from pypy.jit.metainterp.history import FLOAT
from pypy.rlib.unroll import unrolling_iterable
+import pypy.jit.backend.ppc.ppcgen.register as r
def gen_emit_cmp_op(condition, signed=True):
def f(self, op, arglocs, regalloc):
@@ -86,20 +87,28 @@
class saved_registers(object):
def __init__(self, assembler, regs_to_save, regalloc=None):
- self.assembler = assembler
+ self.mc = assembler
self.regalloc = regalloc
if self.regalloc:
- self._filter_regs(regs_to_save, vfp_regs_to_save)
+ assert 0, "not implemented yet"
else:
self.regs = regs_to_save
def __enter__(self):
if len(self.regs) > 0:
- self.assembler.PUSH([r.value for r in self.regs])
+ space = WORD * len(self.regs)
+ self.mc.addi(r.SP.value, r.SP.value, -space)
+ for i, reg in enumerate(self.regs):
+ if IS_PPC_32:
+ self.mc.stw(reg.value, r.SP.value, i * WORD)
+ else:
+ self.mc.std(reg.value, r.SP.value, i * WORD)
- def _filter_regs(self, regs_to_save, vfp_regs_to_save):
- regs = []
- for box, reg in self.regalloc.rm.reg_bindings.iteritems():
- if reg is r.ip or (reg in regs_to_save and self.regalloc.stays_alive(box)):
- regs.append(reg)
- self.regs = regs
+ def __exit__(self, *args):
+ if len(self.regs) > 0:
+ space = WORD * len(self.regs)
+ for i, reg in enumerate(self.regs):
+ if IS_PPC_32:
+ self.mc.lwz(reg.value, r.SP.value, i * WORD)
+ else:
+ self.mc.ld(reg.value, r.SP.value, i * WORD)
diff --git a/pypy/jit/backend/ppc/ppcgen/opassembler.py b/pypy/jit/backend/ppc/ppcgen/opassembler.py
--- a/pypy/jit/backend/ppc/ppcgen/opassembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/opassembler.py
@@ -6,11 +6,14 @@
GPR_SAVE_AREA, BACKCHAIN_SIZE,
MAX_REG_PARAMS)
-from pypy.jit.metainterp.history import LoopToken, AbstractFailDescr, FLOAT
+from pypy.jit.metainterp.history import (LoopToken, AbstractFailDescr, FLOAT,
+ INT)
from pypy.rlib.objectmodel import we_are_translated
-from pypy.jit.backend.ppc.ppcgen.helper.assembler import count_reg_args
+from pypy.jit.backend.ppc.ppcgen.helper.assembler import (count_reg_args,
+ saved_registers)
from pypy.jit.backend.ppc.ppcgen.jump import remap_frame_layout
-from pypy.jit.backend.ppc.ppcgen.regalloc import TempPtr
+from pypy.jit.backend.ppc.ppcgen.codebuilder import OverwritingBuilder
+from pypy.jit.backend.ppc.ppcgen.regalloc import TempPtr, TempInt
from pypy.jit.backend.llsupport import symbolic
from pypy.rpython.lltypesystem import rstr, rffi, lltype
@@ -840,6 +843,128 @@
_mixin_ = True
+ # from: ../x86/assembler.py:1668
+ # XXX Split into some helper methods
+ def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc):
+ faildescr = guard_op.getdescr()
+ fail_index = self.cpu.get_fail_descr_number(faildescr)
+ self._write_fail_index(fail_index)
+
+ descr = op.getdescr()
+ assert isinstance(descr, LoopToken)
+ # XXX check this
+ assert op.numargs() == len(descr._ppc_arglocs[0])
+ resbox = TempInt()
+ self._emit_call(fail_index, descr._ppc_direct_bootstrap_code, op.getarglist(),
+ regalloc, result=resbox)
+ if op.result is None:
+ value = self.cpu.done_with_this_frame_void_v
+ else:
+ kind = op.result.type
+ if kind == INT:
+ value = self.cpu.done_with_this_frame_int_v
+ elif kind == REF:
+ value = self.cpu.done_with_this_frame_ref_v
+ elif kind == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ raise AssertionError(kind)
+ # check value
+ resloc = regalloc.try_allocate_reg(resbox)
+ assert resloc is r.r3
+ self.mc.alloc_scratch_reg(value)
+ if IS_PPC_32:
+ self.mc.cmpw(0, resloc.value, r.r0.value)
+ else:
+ self.mc.cmpd(0, resloc.value, r.r0.value)
+ self.mc.free_scratch_reg()
+ regalloc.possibly_free_var(resbox)
+
+ fast_jmp_pos = self.mc.currpos()
+ self.mc.nop()
+
+ # Path A: use assembler helper
+ # if values are equal we take the fast path
+ # Slow path, calling helper
+ # jump to merge point
+ jd = descr.outermost_jitdriver_sd
+ assert jd is not None
+ asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
+ with saved_registers(self.mc, r.NONVOLATILES + [r.r3]):
+ # resbox is already in r3
+ self.mov_loc_loc(arglocs[1], r.r4)
+ self.mc.bl_abs(asm_helper_adr)
+ if op.result:
+ resloc = regalloc.after_call(op.result)
+ if resloc.is_vfp_reg():
+ assert 0, "not implemented yet"
+
+ # jump to merge point
+ jmp_pos = self.mc.currpos()
+ self.mc.nop()
+
+ # Path B: load return value and reset token
+ # Fast Path using result boxes
+ # patch the jump to the fast path
+ offset = self.mc.currpos() - fast_jmp_pos
+ pmc = OverwritingBuilder(self.mc, fast_jmp_pos, WORD)
+ pmc.b(offset)
+
+ # Reset the vable token --- XXX really too much special logic here:-(
+ if jd.index_of_virtualizable >= 0:
+ from pypy.jit.backend.llsupport.descr import BaseFieldDescr
+ fielddescr = jd.vable_token_descr
+ assert isinstance(fielddescr, BaseFieldDescr)
+ ofs = fielddescr.offset
+ resloc = regalloc.force_allocate_reg(resbox)
+ self.alloc_scratch_reg()
+ self.mov_loc_loc(arglocs[1], r.r0)
+ self.mc.li(resloc.value, 0)
+ if IS_PPC_32:
+ self.mc.stwx(resloc.value, 0, r.r0.value)
+ else:
+ self.mc.stdx(resloc.value, 0, r.r0.value)
+ self.free_scratch_reg()
+ regalloc.possibly_free_var(resbox)
+
+ if op.result is not None:
+ # load the return value from fail_boxes_xxx[0]
+ kind = op.result.type
+ if kind == INT:
+ adr = self.fail_boxes_int.get_addr_for_num(0)
+ elif kind == REF:
+ adr = self.fail_boxes_ptr.get_addr_for_num(0)
+ elif kind == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ raise AssertionError(kind)
+ resloc = regalloc.force_allocate_reg(op.result)
+ regalloc.possibly_free_var(resbox)
+ self.mc.alloc_scratch_reg(adr)
+ if op.result.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ if IS_PPC_32:
+ self.mc.lwzx(resloc.value, 0, r.r0.value)
+ else:
+ self.mc.ldx(resloc.value, 0, r.r0.value)
+ self.mc.free_scratch_reg()
+
+ # merge point
+ offset = self.mc.currpos() - jmp_pos
+
+ self.mc.alloc_scratch_reg()
+ if IS_PPC_32:
+ self.mc.cmpwi(0, r.r0.value, 0)
+ self.mc.lwz(r.r0.value, r.SPP.value, 0)
+ else:
+ self.mc.cmpdi(0, r.r0.value, 0)
+ self.mc.ld(r.r0.value, r.SPP.value, 0)
+ self.mc.cror(2, 1, 2)
+ self.mc.free_scratch_reg()
+
+ self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.EQ)
+
def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
self.mc.mr(r.r0.value, r.SP.value)
if IS_PPC_32:
diff --git a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py
@@ -8,12 +8,14 @@
from pypy.jit.backend.ppc.ppcgen.opassembler import OpAssembler
from pypy.jit.backend.ppc.ppcgen.symbol_lookup import lookup
from pypy.jit.backend.ppc.ppcgen.codebuilder import PPCBuilder
+from pypy.jit.backend.ppc.ppcgen.jump import remap_frame_layout
from pypy.jit.backend.ppc.ppcgen.arch import (IS_PPC_32, IS_PPC_64, WORD,
NONVOLATILES,
GPR_SAVE_AREA, BACKCHAIN_SIZE)
from pypy.jit.backend.ppc.ppcgen.helper.assembler import (gen_emit_cmp_op,
encode32, decode32,
- decode64)
+ decode64,
+ count_reg_args)
import pypy.jit.backend.ppc.ppcgen.register as r
import pypy.jit.backend.ppc.ppcgen.condition as c
from pypy.jit.metainterp.history import (Const, ConstPtr, LoopToken,
@@ -164,9 +166,15 @@
clt.asmmemmgr = []
return clt.asmmemmgr_blocks
+ def _make_prologue(self, target_pos, frame_depth):
+ self._make_frame(frame_depth)
+ curpos = self.mc.currpos()
+ offset = target_pos - curpos
+ self.mc.b(offset)
+
# The code generated here allocates a new stackframe
# and is the first machine code to be executed.
- def _make_prologue(self, target_pos, frame_depth):
+ def _make_frame(self, frame_depth):
if IS_PPC_32:
# save it in previous frame (Backchain)
self.mc.stwu(r.SP.value, r.SP.value, -frame_depth)
@@ -192,10 +200,6 @@
else:
self.mc.ld(r.r30.value, r.SP.value, WORD)
self.mc.std(r.r30.value, r.SPP.value, WORD * len(NONVOLATILES))
- # branch to loop code
- curpos = self.mc.currpos()
- offset = target_pos - curpos
- self.mc.b(offset)
def setup_failure_recovery(self):
@@ -448,6 +452,86 @@
if loc.is_stack():
self.regalloc_mov(r.r0, loc)
+ def gen_direct_bootstrap_code(self, loophead, looptoken, inputargs, frame_depth):
+ self._make_frame(frame_depth)
+ nonfloatlocs = looptoken._ppc_arglocs[0]
+
+ reg_args = count_reg_args(inputargs)
+
+ stack_locs = len(inputargs) - reg_args
+
+ selected_reg = 0
+ count = 0
+ nonfloat_args = []
+ nonfloat_regs = []
+ # load reg args
+ for i in range(reg_args):
+ arg = inputargs[i]
+ if arg.type == FLOAT and count % 2 != 0:
+ assert 0, "not implemented yet"
+ reg = r.PARAM_REGS[selected_reg]
+
+ if arg.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ nonfloat_args.append(reg)
+ nonfloat_regs.append(nonfloatlocs[i])
+
+ if arg.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ selected_reg += 1
+ count += 1
+
+ # remap values stored in core registers
+ self.mc.alloc_scratch_reg()
+ remap_frame_layout(self, nonfloat_args, nonfloat_regs, r.r0)
+ self.mc.free_scratch_reg()
+
+ # load values passed on the stack to the corresponding locations
+ stack_position = self.GPR_SAVE_AREA_AND_FORCE_INDEX
+
+ count = 0
+ for i in range(reg_args, len(inputargs)):
+ arg = inputargs[i]
+ if arg.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ loc = nonfloatlocs[i]
+ if loc.is_reg():
+ if IS_PPC_32:
+ self.mc.lwz(loc.value, r.SPP.value, stack_position)
+ else:
+ self.mc.ld(loc.value, r.SPP.value, stack_position)
+ count += 1
+ elif loc.is_vfp_reg():
+ assert 0, "not implemented yet"
+ elif loc.is_stack():
+ if loc.type == FLOAT:
+ assert 0, "not implemented yet"
+ elif loc.type == INT or loc.type == REF:
+ count += 1
+ self.mc.alloc_scratch_reg()
+ if IS_PPC_32:
+ self.mc.lwz(r.r0.value, r.SPP.value, stack_position)
+ else:
+ self.mc.ld(r.r0.value, r.SPP.value, stack_position)
+ self.mov_loc_loc(r.r0, loc)
+ self.mc.free_scratch_reg()
+ else:
+ assert 0, 'invalid location'
+ else:
+ assert 0, 'invalid location'
+ if loc.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ size = 1
+ stack_position += size * WORD
+
+ #sp_patch_location = self._prepare_sp_patch_position()
+ self.mc.b_offset(loophead)
+ #self._patch_sp_offset(sp_patch_location, looptoken._ppc_frame_depth)
+
def setup(self, looptoken, operations):
assert self.memcpy_addr != 0
self.current_clt = looptoken.compiled_loop_token
@@ -512,9 +596,13 @@
looptoken._ppc_frame_manager_depth = regalloc.frame_manager.frame_depth
self._make_prologue(regalloc_head, frame_depth)
+ direct_bootstrap_code = self.mc.currpos()
+ self.gen_direct_bootstrap_code(loophead, looptoken, inputargs, frame_depth)
+
self.write_pending_failure_recoveries()
loop_start = self.materialize_loop(looptoken, False)
looptoken._ppc_bootstrap_code = loop_start
+ looptoken._ppc_direct_bootstrap_code = loop_start + direct_bootstrap_code
real_start = loop_start + start_pos
if IS_PPC_32:
looptoken.ppc_code = real_start
@@ -621,7 +709,7 @@
if op.has_no_side_effect() and op.result not in regalloc.longevity:
regalloc.possibly_free_vars_for_op(op)
elif self.can_merge_with_next_guard(op, pos, operations)\
- and opnum == rop.CALL_RELEASE_GIL: # XXX fix
+ and opnum in (rop.CALL_RELEASE_GIL, rop.CALL_ASSEMBLER): # XXX fix
regalloc.next_instruction()
arglocs = regalloc.operations_with_guard[opnum](regalloc, op,
operations[pos+1])
@@ -800,6 +888,7 @@
return
assert 0, "not supported location"
assert 0, "not supported location"
+ mov_loc_loc = regalloc_mov
def regalloc_push(self, loc):
"""Pushes the value stored in loc to the stack
@@ -894,11 +983,11 @@
return 0
def _write_fail_index(self, fail_index):
- self.mc.load_imm(r.r0.value, fail_index)
+ self.mc.load_imm(r.r0, fail_index)
if IS_PPC_32:
- self.mc.stw(r.r0.value, r.SSP.value, 0)
+ self.mc.stw(r.r0.value, r.SPP.value, 0)
else:
- self.mc.std(r.r0.value, r.SSP.value, 0)
+ self.mc.std(r.r0.value, r.SPP.value, 0)
def load(self, loc, value):
assert loc.is_reg() and value.is_imm()
diff --git a/pypy/jit/backend/ppc/ppcgen/regalloc.py b/pypy/jit/backend/ppc/ppcgen/regalloc.py
--- a/pypy/jit/backend/ppc/ppcgen/regalloc.py
+++ b/pypy/jit/backend/ppc/ppcgen/regalloc.py
@@ -238,6 +238,12 @@
return self.rm.make_sure_var_in_reg(var, forbidden_vars,
selected_reg, need_lower_byte)
+ def _sync_var(self, v):
+ if v.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ self.rm._sync_var(v)
+
# ******************************************************
# * P R E P A R E O P E R A T I O N S *
# ******************************************************
@@ -715,6 +721,21 @@
prepare_debug_merge_point = void
prepare_jit_debug = void
+ def prepare_guard_call_assembler(self, op, guard_op):
+ descr = op.getdescr()
+ assert isinstance(descr, LoopToken)
+ jd = descr.outermost_jitdriver_sd
+ assert jd is not None
+ size = jd.portal_calldescr.get_result_size(self.cpu.translate_support_code)
+ vable_index = jd.index_of_virtualizable
+ if vable_index >= 0:
+ self._sync_var(op.getarg(vable_index))
+ vable = self.frame_manager.loc(op.getarg(vable_index))
+ else:
+ vable = imm(0)
+ self.possibly_free_vars(guard_op.getfailargs())
+ return [imm(size), vable]
+
def _prepare_args_for_new_op(self, new_args):
gc_ll_descr = self.cpu.gc_ll_descr
args = gc_ll_descr.args_for_new(new_args)
More information about the pypy-commit
mailing list