[pypy-commit] pypy ppc-jit-backend: merge ppc-backend-2
edelsohn
noreply at buildbot.pypy.org
Tue Jun 12 15:43:37 CEST 2012
Author: edelsohn
Branch: ppc-jit-backend
Changeset: r55611:37fbdcc94fe8
Date: 2012-06-12 09:36 -0400
http://bitbucket.org/pypy/pypy/changeset/37fbdcc94fe8/
Log: merge ppc-backend-2
diff --git a/pypy/jit/backend/arm/runner.py b/pypy/jit/backend/arm/runner.py
--- a/pypy/jit/backend/arm/runner.py
+++ b/pypy/jit/backend/arm/runner.py
@@ -1,5 +1,4 @@
from pypy.jit.backend.arm.assembler import AssemblerARM
-from pypy.jit.backend.arm.arch import WORD
from pypy.jit.backend.arm.registers import all_regs, all_vfp_regs
from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
from pypy.rpython.llinterp import LLInterpreter
@@ -10,6 +9,8 @@
class ArmCPU(AbstractLLCPU):
supports_floats = True
+ supports_longlong = False # XXX requires an implementation of
+ # read_timestamp that works in user mode
def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
gcdescr=None):
@@ -41,7 +42,7 @@
clt = original_loop_token.compiled_loop_token
clt.compiling_a_bridge()
return self.assembler.assemble_bridge(faildescr, inputargs, operations,
- original_loop_token, log=log)
+ original_loop_token, log=log)
def get_latest_value_float(self, index):
return self.assembler.fail_boxes_float.getitem(index)
@@ -100,12 +101,15 @@
all_null_registers = lltype.malloc(rffi.LONGP.TO,
len(all_vfp_regs) * 2 + len(all_regs),
flavor='raw', zero=True, immortal=True)
+
def force(self, addr_of_force_index):
TP = rffi.CArrayPtr(lltype.Signed)
fail_index = rffi.cast(TP, addr_of_force_index)[0]
assert fail_index >= 0, "already forced!"
faildescr = self.get_fail_descr_from_number(fail_index)
rffi.cast(TP, addr_of_force_index)[0] = ~fail_index
+ bytecode = self.assembler._find_failure_recovery_bytecode(faildescr)
+ addr_all_null_regsiters = rffi.cast(rffi.LONG, self.all_null_registers)
# start of "no gc operation!" block
fail_index_2 = self.assembler.failure_recovery_func(
bytecode,
diff --git a/pypy/jit/backend/ppc/arch.py b/pypy/jit/backend/ppc/arch.py
--- a/pypy/jit/backend/ppc/arch.py
+++ b/pypy/jit/backend/ppc/arch.py
@@ -2,7 +2,8 @@
from pypy.jit.backend.ppc.register import (NONVOLATILES,
NONVOLATILES_FLOAT,
- MANAGED_REGS)
+ MANAGED_REGS,
+ MANAGED_FP_REGS)
import sys
if sys.maxint == (2**31 - 1):
@@ -25,11 +26,12 @@
GPR_SAVE_AREA = len(NONVOLATILES) * WORD
FLOAT_INT_CONVERSION = WORD
MAX_REG_PARAMS = 8
+MAX_FREG_PARAMS = 13
# we need at most 5 instructions to load a constant
# and one instruction to patch the stack pointer
SIZE_LOAD_IMM_PATCH_SP = 6
-FORCE_INDEX_OFS = len(MANAGED_REGS) * WORD
+FORCE_INDEX_OFS = (len(MANAGED_REGS) + len(MANAGED_FP_REGS)) * WORD
# offset to LR in BACKCHAIN
if IS_PPC_32:
diff --git a/pypy/jit/backend/ppc/codebuilder.py b/pypy/jit/backend/ppc/codebuilder.py
--- a/pypy/jit/backend/ppc/codebuilder.py
+++ b/pypy/jit/backend/ppc/codebuilder.py
@@ -225,6 +225,9 @@
fsqrt = XDB(63, XO1=22, Rc=0)
+ mffgpr = XS(31, XO1=607, Rc=0)
+ mftgpr = XS(31, XO1=735, Rc=0)
+
icbi = X0(31, XO1=982)
lbzux = XD(31, XO1=119)
@@ -1171,8 +1174,10 @@
self._copy_to_raw_memory(addr)
self.flush_cache(addr)
- def cmp_op(self, block, a, b, imm=False, signed=True):
- if IS_PPC_32:
+ def cmp_op(self, block, a, b, imm=False, signed=True, fp=False):
+ if fp == True:
+ self.fcmpu(block, a, b)
+ elif IS_PPC_32:
if signed:
if imm:
# 32 bit immediate signed
diff --git a/pypy/jit/backend/ppc/helper/assembler.py b/pypy/jit/backend/ppc/helper/assembler.py
--- a/pypy/jit/backend/ppc/helper/assembler.py
+++ b/pypy/jit/backend/ppc/helper/assembler.py
@@ -6,12 +6,15 @@
import pypy.jit.backend.ppc.register as r
from pypy.rpython.lltypesystem import rffi, lltype
-def gen_emit_cmp_op(condition, signed=True):
+def gen_emit_cmp_op(condition, signed=True, fp=False):
def f(self, op, arglocs, regalloc):
l0, l1, res = arglocs
# do the comparison
- self.mc.cmp_op(0, l0.value, l1.value,
- imm=l1.is_imm(), signed=signed)
+ if fp == True:
+ self.mc.fcmpu(0, l0.value, l1.value)
+ else:
+ self.mc.cmp_op(0, l0.value, l1.value,
+ imm=l1.is_imm(), signed=signed)
# After the comparison, place the result
# in the first bit of the CR
if condition == c.LT or condition == c.U_LT:
@@ -25,7 +28,7 @@
elif condition == c.GT or condition == c.U_GT:
self.mc.cror(0, 1, 1)
elif condition == c.NE:
- self.mc.cror(0, 0, 1)
+ self.mc.crnor(0, 2, 2)
else:
assert 0, "condition not known"
@@ -58,7 +61,8 @@
count = 0
for x in range(min(len(args), MAX_REG_PARAMS)):
if args[x].type == FLOAT:
- assert 0, "not implemented yet"
+ count += 1
+ words += 1
else:
count += 1
words += 1
@@ -73,9 +77,11 @@
in ENCODING AREA around calls
"""
- def __init__(self, codebuilder, save_RES=True):
+ def __init__(self, codebuilder, save_RES=True, save_FLOAT=True):
+ self.mc = codebuilder
self.save_RES = save_RES
- self.mc = codebuilder
+ self.save_FLOAT = save_FLOAT
+ self.FLOAT_OFFSET = len(r.VOLATILES)
def __enter__(self):
""" before a call, volatile registers are saved in ENCODING AREA
@@ -84,6 +90,12 @@
if not self.save_RES and reg is r.RES:
continue
self.mc.store(reg.value, r.SPP.value, i * WORD)
+ if self.save_FLOAT:
+ for i, reg in enumerate(r.VOLATILES_FLOAT):
+ if not self.save_RES and reg is r.f1:
+ continue
+ self.mc.stfd(reg.value, r.SPP.value,
+ (i + self.FLOAT_OFFSET) * WORD)
def __exit__(self, *args):
""" after call, volatile registers have to be restored
@@ -92,3 +104,9 @@
if not self.save_RES and reg is r.RES:
continue
self.mc.load(reg.value, r.SPP.value, i * WORD)
+ if self.save_FLOAT:
+ for i, reg in enumerate(r.VOLATILES_FLOAT):
+ if not self.save_RES and reg is r.f1:
+ continue
+ self.mc.lfd(reg.value, r.SPP.value,
+ (i + self.FLOAT_OFFSET) * WORD)
diff --git a/pypy/jit/backend/ppc/helper/regalloc.py b/pypy/jit/backend/ppc/helper/regalloc.py
--- a/pypy/jit/backend/ppc/helper/regalloc.py
+++ b/pypy/jit/backend/ppc/helper/regalloc.py
@@ -1,5 +1,4 @@
-from pypy.jit.metainterp.history import ConstInt
-from pypy.jit.metainterp.history import Box
+from pypy.jit.metainterp.history import ConstInt, Box, FLOAT
IMM_SIZE = 2 ** 15 - 1
@@ -84,3 +83,41 @@
self.possibly_free_var(op.result)
return [reg1, reg2, res]
return f
+
+def prepare_float_op(name=None, base=True, float_result=True, guard=False):
+ if guard:
+ def f(self, op, guard_op):
+ locs = []
+ loc1 = self._ensure_value_is_boxed(op.getarg(0))
+ locs.append(loc1)
+ if base:
+ loc2 = self._ensure_value_is_boxed(op.getarg(1))
+ locs.append(loc2)
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ if guard_op is None:
+ res = self.force_allocate_reg(op.result)
+ assert float_result == (op.result.type == FLOAT)
+ locs.append(res)
+ return locs
+ else:
+ args = self._prepare_guard(guard_op, locs)
+ return args
+ else:
+ def f(self, op):
+ locs = []
+ loc1 = self._ensure_value_is_boxed(op.getarg(0))
+ locs.append(loc1)
+ if base:
+ loc2 = self._ensure_value_is_boxed(op.getarg(1))
+ locs.append(loc2)
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.force_allocate_reg(op.result)
+ assert float_result == (op.result.type == FLOAT)
+ locs.append(res)
+ return locs
+ if name:
+ f.__name__ = name
+ return f
+
diff --git a/pypy/jit/backend/ppc/locations.py b/pypy/jit/backend/ppc/locations.py
--- a/pypy/jit/backend/ppc/locations.py
+++ b/pypy/jit/backend/ppc/locations.py
@@ -4,8 +4,10 @@
# XXX import from arch.py, currently we have a circular import
if sys.maxint == (2**31 - 1):
WORD = 4
+ FWORD = 8
else:
WORD = 8
+ FWORD = 8
DWORD = 2 * WORD
class AssemblerLocation(object):
@@ -21,7 +23,7 @@
def is_reg(self):
return False
- def is_vfp_reg(self):
+ def is_fp_reg(self):
return False
def is_imm_float(self):
@@ -49,7 +51,7 @@
class FPRegisterLocation(RegisterLocation):
_immutable_ = True
type = FLOAT
- width = DWORD
+ width = FWORD
def __repr__(self):
return 'fp%d' % self.value
@@ -83,10 +85,36 @@
def as_key(self):
return self.value + 40
+class ConstFloatLoc(AssemblerLocation):
+ """This class represents an imm float value which is stored in memory at
+ the address stored in the field value"""
+ _immutable_ = True
+ width = FWORD
+ type = FLOAT
+
+ def __init__(self, value):
+ self.value = value
+
+ def getint(self):
+ return self.value
+
+ def __repr__(self):
+ return "imm_float(stored at %d)" % (self.value)
+
+ def is_imm_float(self):
+ return True
+
+ def as_key(self):
+ return self.value
+
class StackLocation(AssemblerLocation):
_immutable_ = True
def __init__(self, position, num_words=1, type=INT):
+ if type == FLOAT:
+ self.width = FWORD
+ else:
+ self.width = WORD
self.position = position
self.type = type
self.value = get_spp_offset(position)
diff --git a/pypy/jit/backend/ppc/opassembler.py b/pypy/jit/backend/ppc/opassembler.py
--- a/pypy/jit/backend/ppc/opassembler.py
+++ b/pypy/jit/backend/ppc/opassembler.py
@@ -5,13 +5,13 @@
from pypy.jit.backend.ppc.locations import imm
from pypy.jit.backend.ppc.locations import imm as make_imm_loc
from pypy.jit.backend.ppc.arch import (IS_PPC_32, WORD, BACKCHAIN_SIZE,
- MAX_REG_PARAMS, FORCE_INDEX_OFS)
+ MAX_REG_PARAMS, MAX_FREG_PARAMS,
+ FORCE_INDEX_OFS)
from pypy.jit.metainterp.history import (JitCellToken, TargetToken, Box,
AbstractFailDescr, FLOAT, INT, REF)
from pypy.rlib.objectmodel import we_are_translated
-from pypy.jit.backend.ppc.helper.assembler import (count_reg_args,
- Saved_Volatiles)
+from pypy.jit.backend.ppc.helper.assembler import (Saved_Volatiles)
from pypy.jit.backend.ppc.jump import remap_frame_layout
from pypy.jit.backend.ppc.codebuilder import (OverwritingBuilder, scratch_reg,
PPCBuilder)
@@ -53,8 +53,7 @@
def emit_int_sub(self, op, arglocs, regalloc):
l0, l1, res = arglocs
if l0.is_imm():
- self.mc.load_imm(r.r0, l0.value)
- self.mc.sub(res.value, r.r0.value, l1.value)
+ self.mc.subfic(res.value, l1.value, l0.value)
elif l1.is_imm():
self.mc.subi(res.value, l0.value, l1.value)
else:
@@ -174,6 +173,55 @@
l0, res = arglocs
self.mc.not_(res.value, l0.value)
+class FloatOpAssembler(object):
+ _mixin_ = True
+
+ def emit_float_add(self, op, arglocs, regalloc):
+ l0, l1, res = arglocs
+ self.mc.fadd(res.value, l0.value, l1.value)
+
+ def emit_float_sub(self, op, arglocs, regalloc):
+ l0, l1, res = arglocs
+ self.mc.fsub(res.value, l0.value, l1.value)
+
+ def emit_float_mul(self, op, arglocs, regalloc):
+ l0, l1, res = arglocs
+ self.mc.fmul(res.value, l0.value, l1.value)
+
+ def emit_float_truediv(self, op, arglocs, regalloc):
+ l0, l1, res = arglocs
+ self.mc.fdiv(res.value, l0.value, l1.value)
+
+ def emit_float_neg(self, op, arglocs, regalloc):
+ l0, res = arglocs
+ self.mc.fneg(res.value, l0.value)
+
+ def emit_float_abs(self, op, arglocs, regalloc):
+ l0, res = arglocs
+ self.mc.fabs(res.value, l0.value)
+
+ def emit_math_sqrt(self, op, arglocs, regalloc):
+ l0, res = arglocs
+ self.mc.fsqrt(res.value, l0.value)
+
+ emit_float_le = gen_emit_cmp_op(c.LE, fp=True)
+ emit_float_lt = gen_emit_cmp_op(c.LT, fp=True)
+ emit_float_gt = gen_emit_cmp_op(c.GT, fp=True)
+ emit_float_ge = gen_emit_cmp_op(c.GE, fp=True)
+ emit_float_eq = gen_emit_cmp_op(c.EQ, fp=True)
+ emit_float_ne = gen_emit_cmp_op(c.NE, fp=True)
+
+ def emit_cast_float_to_int(self, op, arglocs, regalloc):
+ l0, temp_loc, res = arglocs
+ self.mc.fctidz(temp_loc.value, l0.value)
+ self.mc.stfd(temp_loc.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.ld(res.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+
+ def emit_cast_int_to_float(self, op, arglocs, regalloc):
+ l0, temp_loc, res = arglocs
+ self.mc.std(l0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.lfd(temp_loc.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.fcfid(res.value, temp_loc.value)
class GuardOpAssembler(object):
@@ -235,8 +283,9 @@
self.mc.cmp_op(0, l0.value, l1.getint(), imm=True)
else:
self.mc.cmp_op(0, l0.value, l1.value)
- else:
- assert 0, "not implemented yet"
+ elif l0.is_fp_reg():
+ assert l1.is_fp_reg()
+ self.mc.cmp_op(0, l0.value, l1.value, fp=True)
self._emit_guard(op, failargs, c.NE)
emit_guard_nonnull = emit_guard_true
@@ -299,12 +348,20 @@
with scratch_reg(self.mc):
self.mc.load_imm(r.SCRATCH, adr)
self.mc.storex(loc.value, 0, r.SCRATCH.value)
- elif loc.is_vfp_reg():
+ elif loc.is_fp_reg():
assert box.type == FLOAT
- assert 0, "not implemented yet"
+ adr = self.fail_boxes_float.get_addr_for_num(i)
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, adr)
+ self.mc.stfdx(loc.value, 0, r.SCRATCH.value)
elif loc.is_stack() or loc.is_imm() or loc.is_imm_float():
if box.type == FLOAT:
- assert 0, "not implemented yet"
+ adr = self.fail_boxes_float.get_addr_for_num(i)
+ self.mc.stfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mov_loc_loc(loc, r.f0)
+ self.mc.load_imm(r.SCRATCH, adr)
+ self.mc.stfdx(r.f0.value, 0, r.SCRATCH.value)
+ self.mc.lfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
elif box.type == REF or box.type == INT:
if box.type == REF:
adr = self.fail_boxes_ptr.get_addr_for_num(i)
@@ -401,25 +458,43 @@
def _emit_call(self, force_index, adr, arglocs, result=None):
n_args = len(arglocs)
- reg_args = count_reg_args(arglocs)
- n = 0 # used to count the number of words pushed on the stack, so we
- # can later modify the SP back to its original value
+ # collect variables that need to go in registers
+ # and the registers they will be stored in
+ num = 0
+ fpnum = 0
+ count = 0
+ non_float_locs = []
+ non_float_regs = []
+ float_locs = []
+ float_regs = []
stack_args = []
- if n_args > reg_args:
- # first we need to prepare the list so it stays aligned
- count = 0
- for i in range(reg_args, n_args):
- arg = arglocs[i]
- if arg.type == FLOAT:
- assert 0, "not implemented yet"
+ float_stack_arg = False
+ for i in range(n_args):
+ arg = arglocs[i]
+
+ if arg.type == FLOAT:
+ if fpnum < MAX_FREG_PARAMS:
+ fpreg = r.PARAM_FPREGS[fpnum]
+ float_locs.append(arg)
+ float_regs.append(fpreg)
+ fpnum += 1
else:
- count += 1
- n += WORD
- stack_args.append(arg)
- if count % 2 != 0:
- n += WORD
- stack_args.append(None)
+ stack_args.append(arg)
+ else:
+ if num < MAX_REG_PARAMS:
+ reg = r.PARAM_REGS[num]
+ non_float_locs.append(arg)
+ non_float_regs.append(reg)
+ num += 1
+ else:
+ stack_args.append(arg)
+ float_stack_arg = True
+
+ if adr in non_float_regs:
+ non_float_locs.append(adr)
+ non_float_regs.append(r.r11)
+ adr = r.r11
# compute maximum of parameters passed
self.max_stack_params = max(self.max_stack_params, len(stack_args))
@@ -428,46 +503,26 @@
if IS_PPC_32:
param_offset = BACKCHAIN_SIZE * WORD
else:
- param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS)
- * WORD) # space for first 8 parameters
+ # space for first 8 parameters
+ param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
with scratch_reg(self.mc):
+ if float_stack_arg:
+ self.mc.stfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
for i, arg in enumerate(stack_args):
offset = param_offset + i * WORD
if arg is not None:
- self.regalloc_mov(arg, r.SCRATCH)
- self.mc.store(r.SCRATCH.value, r.SP.value, offset)
-
- # collect variables that need to go in registers
- # and the registers they will be stored in
- num = 0
- count = 0
- non_float_locs = []
- non_float_regs = []
- for i in range(reg_args):
- arg = arglocs[i]
- if arg.type == FLOAT and count % 2 != 0:
- assert 0, "not implemented yet"
- reg = r.PARAM_REGS[num]
-
- if arg.type == FLOAT:
- assert 0, "not implemented yet"
- else:
- non_float_locs.append(arg)
- non_float_regs.append(reg)
-
- if arg.type == FLOAT:
- assert 0, "not implemented yet"
- else:
- num += 1
- count += 1
-
- if adr in non_float_regs:
- non_float_locs.append(adr)
- non_float_regs.append(r.r11)
- adr = r.r11
+ if arg.type == FLOAT:
+ self.regalloc_mov(arg, r.f0)
+ self.mc.stfd(r.f0.value, r.SP.value, offset)
+ else:
+ self.regalloc_mov(arg, r.SCRATCH)
+ self.mc.store(r.SCRATCH.value, r.SP.value, offset)
+ if float_stack_arg:
+ self.mc.lfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
# remap values stored in core registers
+ remap_frame_layout(self, float_locs, float_regs, r.f0)
remap_frame_layout(self, non_float_locs, non_float_regs, r.SCRATCH)
# the actual call
@@ -489,10 +544,16 @@
def emit_setfield_gc(self, op, arglocs, regalloc):
value_loc, base_loc, ofs, size = arglocs
if size.value == 8:
- if ofs.is_imm():
- self.mc.std(value_loc.value, base_loc.value, ofs.value)
+ if value_loc.is_fp_reg():
+ if ofs.is_imm():
+ self.mc.stfd(value_loc.value, base_loc.value, ofs.value)
+ else:
+ self.mc.stfdx(value_loc.value, base_loc.value, ofs.value)
else:
- self.mc.stdx(value_loc.value, base_loc.value, ofs.value)
+ if ofs.is_imm():
+ self.mc.std(value_loc.value, base_loc.value, ofs.value)
+ else:
+ self.mc.stdx(value_loc.value, base_loc.value, ofs.value)
elif size.value == 4:
if ofs.is_imm():
self.mc.stw(value_loc.value, base_loc.value, ofs.value)
@@ -516,10 +577,16 @@
def emit_getfield_gc(self, op, arglocs, regalloc):
base_loc, ofs, res, size = arglocs
if size.value == 8:
- if ofs.is_imm():
- self.mc.ld(res.value, base_loc.value, ofs.value)
+ if res.is_fp_reg():
+ if ofs.is_imm():
+ self.mc.lfd(res.value, base_loc.value, ofs.value)
+ else:
+ self.mc.lfdx(res.value, base_loc.value, ofs.value)
else:
- self.mc.ldx(res.value, base_loc.value, ofs.value)
+ if ofs.is_imm():
+ self.mc.ld(res.value, base_loc.value, ofs.value)
+ else:
+ self.mc.ldx(res.value, base_loc.value, ofs.value)
elif size.value == 4:
if ofs.is_imm():
self.mc.lwz(res.value, base_loc.value, ofs.value)
@@ -560,7 +627,10 @@
self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
if fieldsize.value == 8:
- self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
+ if res_loc.is_fp_reg():
+ self.mc.lfdx(res_loc.value, base_loc.value, r.SCRATCH.value)
+ else:
+ self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
elif fieldsize.value == 4:
self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value)
elif fieldsize.value == 2:
@@ -587,7 +657,10 @@
else:
self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
if fieldsize.value == 8:
- self.mc.stdx(value_loc.value, base_loc.value, r.SCRATCH.value)
+ if value_loc.is_fp_reg():
+ self.mc.stfdx(value_loc.value, base_loc.value, r.SCRATCH.value)
+ else:
+ self.mc.stdx(value_loc.value, base_loc.value, r.SCRATCH.value)
elif fieldsize.value == 4:
self.mc.stwx(value_loc.value, base_loc.value, r.SCRATCH.value)
elif fieldsize.value == 2:
@@ -626,7 +699,10 @@
scale_loc = r.SCRATCH
if scale.value == 3:
- self.mc.stdx(value_loc.value, base_loc.value, scale_loc.value)
+ if value_loc.is_fp_reg():
+ self.mc.stfdx(value_loc.value, base_loc.value, scale_loc.value)
+ else:
+ self.mc.stdx(value_loc.value, base_loc.value, scale_loc.value)
elif scale.value == 2:
self.mc.stwx(value_loc.value, base_loc.value, scale_loc.value)
elif scale.value == 1:
@@ -657,7 +733,10 @@
scale_loc = r.SCRATCH
if scale.value == 3:
- self.mc.ldx(res.value, base_loc.value, scale_loc.value)
+ if res.is_fp_reg():
+ self.mc.lfdx(res.value, base_loc.value, scale_loc.value)
+ else:
+ self.mc.ldx(res.value, base_loc.value, scale_loc.value)
elif scale.value == 2:
self.mc.lwzx(res.value, base_loc.value, scale_loc.value)
elif scale.value == 1:
@@ -1000,8 +1079,7 @@
# use r20 as temporary register, save it in FORCE INDEX slot
temp_reg = r.r20
- ENCODING_AREA = len(r.MANAGED_REGS) * WORD
- self.mc.store(temp_reg.value, r.SPP.value, ENCODING_AREA)
+ self.mc.store(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS)
self.mc.srli_op(temp_reg.value, loc_index.value, s)
self.mc.not_(temp_reg.value, temp_reg.value)
@@ -1021,7 +1099,7 @@
# done
# restore temporary register r20
- self.mc.load(temp_reg.value, r.SPP.value, ENCODING_AREA)
+ self.mc.load(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS)
# patch the JMP above
offset = self.mc.currpos()
@@ -1048,9 +1126,8 @@
def emit_force_token(self, op, arglocs, regalloc):
res_loc = arglocs[0]
- ENCODING_AREA = len(r.MANAGED_REGS) * WORD
self.mc.mr(res_loc.value, r.SPP.value)
- self.mc.addi(res_loc.value, res_loc.value, ENCODING_AREA)
+ self.mc.addi(res_loc.value, res_loc.value, FORCE_INDEX_OFS)
# self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
# from: ../x86/assembler.py:1668
@@ -1121,13 +1198,13 @@
elif kind == REF:
adr = self.fail_boxes_ptr.get_addr_for_num(0)
elif kind == FLOAT:
- assert 0, "not implemented"
+ adr = self.fail_boxes_float.get_addr_for_num(0)
else:
raise AssertionError(kind)
with scratch_reg(self.mc):
self.mc.load_imm(r.SCRATCH, adr)
if op.result.type == FLOAT:
- assert 0, "not implemented yet"
+ self.mc.lfdx(resloc.value, 0, r.SCRATCH.value)
else:
self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
@@ -1143,14 +1220,14 @@
# Path B: use assembler helper
asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
if self.cpu.supports_floats:
- assert 0, "not implemented yet"
+ floats = r.VOLATILES_FLOAT
+ else:
+ floats = []
with Saved_Volatiles(self.mc, save_RES=False):
# result of previous call is in r3
self.mov_loc_loc(arglocs[0], r.r4)
self.mc.call(asm_helper_adr)
- if op.result and resloc.is_vfp_reg():
- assert 0, "not implemented yet"
# merge point
currpos = self.mc.currpos()
@@ -1158,9 +1235,8 @@
pmc.b(currpos - fast_path_to_end_jump_pos)
pmc.overwrite()
- ENCODING_AREA = len(r.MANAGED_REGS) * WORD
with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
+ self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
self._emit_guard(guard_op, regalloc._prepare_guard(guard_op),
@@ -1254,7 +1330,7 @@
MiscOpAssembler, FieldOpAssembler,
ArrayOpAssembler, StrOpAssembler,
UnicodeOpAssembler, ForceOpAssembler,
- AllocOpAssembler):
+ AllocOpAssembler, FloatOpAssembler):
def nop(self):
self.mc.ori(0, 0, 0)
diff --git a/pypy/jit/backend/ppc/ppc_assembler.py b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -6,7 +6,7 @@
from pypy.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
NONVOLATILES, MAX_REG_PARAMS,
GPR_SAVE_AREA, BACKCHAIN_SIZE,
- FPR_SAVE_AREA,
+ FPR_SAVE_AREA, NONVOLATILES_FLOAT,
FLOAT_INT_CONVERSION, FORCE_INDEX,
SIZE_LOAD_IMM_PATCH_SP,
FORCE_INDEX_OFS)
@@ -20,6 +20,7 @@
from pypy.jit.backend.model import CompiledLoopToken
from pypy.rpython.lltypesystem import lltype, rffi, llmemory
from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.jit.codewriter import longlong
from pypy.jit.metainterp.history import (INT, REF, FLOAT)
from pypy.jit.backend.x86.support import values_array
from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
@@ -69,10 +70,11 @@
class AssemblerPPC(OpAssembler):
- FORCE_INDEX_AREA = len(r.MANAGED_REGS) * WORD
- ENCODING_AREA = len(r.MANAGED_REGS) * WORD
+ ENCODING_AREA = FORCE_INDEX_OFS
OFFSET_SPP_TO_GPR_SAVE_AREA = (FORCE_INDEX + FLOAT_INT_CONVERSION
+ ENCODING_AREA)
+ OFFSET_SPP_TO_FPR_SAVE_AREA = (OFFSET_SPP_TO_GPR_SAVE_AREA
+ + GPR_SAVE_AREA)
OFFSET_SPP_TO_OLD_BACKCHAIN = (OFFSET_SPP_TO_GPR_SAVE_AREA
+ GPR_SAVE_AREA + FPR_SAVE_AREA)
@@ -83,6 +85,8 @@
def __init__(self, cpu, failargs_limit=1000):
self.cpu = cpu
self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
+ self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
+ failargs_limit)
self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
self.mc = None
self.datablockwrapper = None
@@ -102,21 +106,27 @@
self._debug = v
def _save_nonvolatiles(self):
- """ save nonvolatile GPRs in GPR SAVE AREA
+ """ save nonvolatile GPRs and FPRs in SAVE AREA
"""
for i, reg in enumerate(NONVOLATILES):
# save r31 later on
if reg.value == r.SPP.value:
continue
self.mc.store(reg.value, r.SPP.value,
- self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i)
+ self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i)
+ for i, reg in enumerate(NONVOLATILES_FLOAT):
+ self.mc.stfd(reg.value, r.SPP.value,
+ self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i)
def _restore_nonvolatiles(self, mc, spp_reg):
- """ restore nonvolatile GPRs from GPR SAVE AREA
+ """ restore nonvolatile GPRs and FPRs from SAVE AREA
"""
for i, reg in enumerate(NONVOLATILES):
mc.load(reg.value, spp_reg.value,
- self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i)
+ self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i)
+ for i, reg in enumerate(NONVOLATILES_FLOAT):
+ mc.lfd(reg.value, spp_reg.value,
+ self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i)
# The code generated here allocates a new stackframe
# and is the first machine code to be executed.
@@ -169,23 +179,27 @@
def setup_failure_recovery(self):
@rgc.no_collect
- def failure_recovery_func(mem_loc, spilling_pointer):
+ def failure_recovery_func(mem_loc, spilling_pointer,
+ managed_registers_pointer):
"""
mem_loc is a pointer to the beginning of the encoding.
- spilling_pointer is the address of the FORCE_INDEX.
+ spilling_pointer is the address of the spilling area.
"""
- regs = rffi.cast(rffi.LONGP, spilling_pointer)
+ regs = rffi.cast(rffi.LONGP, managed_registers_pointer)
+ fpregs = rffi.ptradd(regs, len(r.MANAGED_REGS))
+ fpregs = rffi.cast(rffi.LONGP, fpregs)
return self.decode_registers_and_descr(mem_loc,
- spilling_pointer, regs)
+ spilling_pointer,
+ regs, fpregs)
self.failure_recovery_func = failure_recovery_func
- recovery_func_sign = lltype.Ptr(lltype.FuncType([lltype.Signed,
- lltype.Signed], lltype.Signed))
+ recovery_func_sign = lltype.Ptr(lltype.FuncType([lltype.Signed] * 3,
+ lltype.Signed))
@rgc.no_collect
- def decode_registers_and_descr(self, mem_loc, spp, registers):
+ def decode_registers_and_descr(self, mem_loc, spp, registers, fp_registers):
"""Decode locations encoded in memory at mem_loc and write the values
to the failboxes. Values for spilled vars and registers are stored on
stack at frame_loc """
@@ -194,6 +208,7 @@
bytecode = rffi.cast(rffi.UCHARP, mem_loc)
num = 0
value = 0
+ fvalue = 0
code_inputarg = False
while True:
code = rffi.cast(lltype.Signed, bytecode[0])
@@ -216,7 +231,8 @@
code = ~code
code_inputarg = False
if kind == self.DESCR_FLOAT:
- assert 0, "not implemented yet"
+ start = spp + get_spp_offset(int(code))
+ fvalue = rffi.cast(rffi.LONGP, start)[0]
else:
start = spp + get_spp_offset(int(code))
value = rffi.cast(rffi.LONGP, start)[0]
@@ -234,13 +250,15 @@
break
code >>= 2
if kind == self.DESCR_FLOAT:
- assert 0, "not implemented yet"
+ reg_index = r.get_managed_fpreg_index(code)
+ fvalue = fp_registers[reg_index]
else:
reg_index = r.get_managed_reg_index(code)
value = registers[reg_index]
# store the loaded value into fail_boxes_<type>
if kind == self.DESCR_FLOAT:
- assert 0, "not implemented yet"
+ tgt = self.fail_boxes_float.get_addr_for_num(num)
+ rffi.cast(rffi.LONGP, tgt)[0] = fvalue
else:
if kind == self.DESCR_INT:
tgt = self.fail_boxes_int.get_addr_for_num(num)
@@ -295,7 +313,9 @@
kind = code & 3
code >>= 2
if kind == self.DESCR_FLOAT:
- assert 0, "not implemented yet"
+ assert (r.ALL_FLOAT_REGS[code] is
+ r.MANAGED_FP_REGS[r.get_managed_fpreg_index(code)])
+ loc = r.ALL_FLOAT_REGS[code]
else:
#loc = r.all_regs[code]
assert (r.ALL_REGS[code] is
@@ -309,7 +329,7 @@
if IS_PPC_64:
for _ in range(6):
mc.write32(0)
- frame_size = (# add space for floats later
+ frame_size = (len(r.MANAGED_FP_REGS) * WORD
+ (BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
with scratch_reg(mc):
@@ -323,7 +343,9 @@
mc.std(r.SCRATCH.value, r.SP.value, frame_size + 2 * WORD)
# managed volatiles are saved below
if self.cpu.supports_floats:
- assert 0, "make sure to save floats here"
+ for i in range(len(r.MANAGED_FP_REGS)):
+ mc.std(r.MANAGED_FP_REGS[i].value, r.SP.value,
+ (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
# Values to compute size stored in r3 and r4
mc.subf(r.RES.value, r.RES.value, r.r4.value)
addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
@@ -332,6 +354,11 @@
mc.call(rffi.cast(lltype.Signed, addr))
for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
mc.load(reg.value, r.SPP.value, ofs)
+ # restore floats
+ if self.cpu.supports_floats:
+ for i in range(len(r.MANAGED_FP_REGS)):
+ mc.lfd(r.MANAGED_FP_REGS[i].value, r.SP.value,
+ (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
mc.cmp_op(0, r.RES.value, 0, imm=True)
jmp_pos = mc.currpos()
@@ -525,8 +552,10 @@
addr = rffi.cast(lltype.Signed, decode_func_addr)
# load parameters into parameter registers
- mc.load(r.RES.value, r.SPP.value, self.FORCE_INDEX_AREA) # address of state encoding
- mc.mr(r.r4.value, r.SPP.value) # load spilling pointer
+ # address of state encoding
+ mc.load(r.RES.value, r.SPP.value, FORCE_INDEX_OFS)
+ mc.mr(r.r4.value, r.SPP.value) # load spilling pointer
+ mc.mr(r.r5.value, r.SPP.value) # load managed registers pointer
#
# call decoding function
mc.call(addr)
@@ -565,6 +594,10 @@
for i in range(len(r.MANAGED_REGS)):
reg = r.MANAGED_REGS[i]
mc.store(reg.value, r.SPP.value, i * WORD)
+ FLOAT_OFFSET = len(r.MANAGED_REGS)
+ for i in range(len(r.MANAGED_FP_REGS)):
+ fpreg = r.MANAGED_FP_REGS[i]
+ mc.stfd(fpreg.value, r.SPP.value, (i + FLOAT_OFFSET) * WORD)
def gen_bootstrap_code(self, loophead, spilling_area):
self._insert_stack_check()
@@ -901,7 +934,7 @@
elif arg.type == INT:
kind = self.DESCR_INT
elif arg.type == FLOAT:
- assert 0, "not implemented"
+ kind = self.DESCR_FLOAT
else:
raise AssertionError("bogus kind")
loc = locs[i]
@@ -912,7 +945,7 @@
pos = ~pos
n = self.CODE_FROMSTACK // 4 + pos
else:
- assert loc.is_reg() or loc.is_vfp_reg()
+ assert loc.is_reg() or loc.is_fp_reg()
n = loc.value
n = kind + 4 * n
while n > 0x7F:
@@ -1093,7 +1126,7 @@
encoding_adr = self.gen_descr_encoding(descr, args, arglocs[1:])
with scratch_reg(self.mc):
self.mc.load_imm(r.SCRATCH, encoding_adr)
- self.mc.store(r.SCRATCH.value, r.SPP.value, self.ENCODING_AREA)
+ self.mc.store(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
self.mc.b_abs(path)
return encoding_adr
@@ -1168,6 +1201,12 @@
self.mc.load(r.SCRATCH.value, r.SPP.value, offset)
self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset)
return
+ # move from memory to fp register
+ elif loc.is_fp_reg():
+ assert prev_loc.type == FLOAT, 'source not float location'
+ reg = loc.as_key()
+ self.mc.lfd(reg, r.SPP.value, offset)
+ return
assert 0, "not supported location"
elif prev_loc.is_reg():
reg = prev_loc.as_key()
@@ -1182,6 +1221,36 @@
self.mc.store(reg, r.SPP.value, offset)
return
assert 0, "not supported location"
+ elif prev_loc.is_imm_float():
+ value = prev_loc.getint()
+ # move immediate value to fp register
+ if loc.is_fp_reg():
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, value)
+ self.mc.lfdx(loc.value, 0, r.SCRATCH.value)
+ return
+ # move immediate value to memory
+ elif loc.is_stack():
+ with scratch_reg(self.mc):
+ offset = loc.value
+ self.mc.load_imm(r.SCRATCH, value)
+ self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
+ return
+ assert 0, "not supported location"
+ elif prev_loc.is_fp_reg():
+ reg = prev_loc.as_key()
+ # move to another fp register
+ if loc.is_fp_reg():
+ other_reg = loc.as_key()
+ self.mc.fmr(other_reg, reg)
+ return
+ # move from fp register to memory
+ elif loc.is_stack():
+ assert loc.type == FLOAT, "target not float location"
+ offset = loc.value
+ self.mc.stfd(reg, r.SPP.value, offset)
+ return
+ assert 0, "not supported location"
assert 0, "not supported location"
mov_loc_loc = regalloc_mov
@@ -1191,8 +1260,6 @@
loc"""
if loc.is_stack():
- if loc.type == FLOAT:
- assert 0, "not implemented yet"
# XXX this code has to be verified
assert not self.stack_in_use
target = StackLocation(self.ENCODING_AREA // WORD) # write to ENCODING AREA
@@ -1213,8 +1280,6 @@
"""Pops the value on top of the stack to loc. Can trash the current
value of SCRATCH when popping to a stack loc"""
if loc.is_stack():
- if loc.type == FLOAT:
- assert 0, "not implemented yet"
# XXX this code has to be verified
assert self.stack_in_use
from_loc = StackLocation(self.ENCODING_AREA // WORD) # read from ENCODING AREA
@@ -1328,14 +1393,17 @@
def _write_fail_index(self, fail_index):
with scratch_reg(self.mc):
self.mc.load_imm(r.SCRATCH, fail_index)
- self.mc.store(r.SCRATCH.value, r.SPP.value, self.FORCE_INDEX_AREA)
+ self.mc.store(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
def load(self, loc, value):
- assert loc.is_reg() and value.is_imm()
+ assert (loc.is_reg() and value.is_imm()
+ or loc.is_fp_reg() and value.is_imm_float())
if value.is_imm():
self.mc.load_imm(loc, value.getint())
elif value.is_imm_float():
- assert 0, "not implemented yet"
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, value.getint())
+ self.mc.lfdx(loc.value, 0, r.SCRATCH.value)
def notimplemented_op(self, op, arglocs, regalloc):
print "[PPC/asm] %s not implemented" % op.getopname()
diff --git a/pypy/jit/backend/ppc/regalloc.py b/pypy/jit/backend/ppc/regalloc.py
--- a/pypy/jit/backend/ppc/regalloc.py
+++ b/pypy/jit/backend/ppc/regalloc.py
@@ -1,15 +1,18 @@
from pypy.jit.backend.llsupport.regalloc import (RegisterManager, FrameManager,
TempBox, compute_vars_longevity)
from pypy.jit.backend.ppc.arch import (WORD, MY_COPY_OF_REGS, IS_PPC_32)
-from pypy.jit.backend.ppc.jump import remap_frame_layout
+from pypy.jit.codewriter import longlong
+from pypy.jit.backend.ppc.jump import (remap_frame_layout,
+ remap_frame_layout_mixed)
from pypy.jit.backend.ppc.locations import imm
from pypy.jit.backend.ppc.helper.regalloc import (_check_imm_arg,
prepare_cmp_op,
prepare_unary_int_op,
prepare_binary_int_op,
prepare_binary_int_op_with_imm,
- prepare_unary_cmp)
-from pypy.jit.metainterp.history import (Const, ConstInt, ConstPtr,
+ prepare_unary_cmp,
+ prepare_float_op)
+from pypy.jit.metainterp.history import (Const, ConstInt, ConstFloat, ConstPtr,
Box, BoxPtr,
INT, REF, FLOAT)
from pypy.jit.metainterp.history import JitCellToken, TargetToken
@@ -24,6 +27,7 @@
from pypy.jit.backend.llsupport.descr import unpack_fielddescr
from pypy.jit.backend.llsupport.descr import unpack_interiorfielddescr
from pypy.rlib.objectmodel import we_are_translated
+from pypy.jit.codewriter.effectinfo import EffectInfo
# xxx hack: set a default value for TargetToken._arm_loop_code. If 0, we know
# that it is a LABEL that was not compiled yet.
@@ -41,6 +45,52 @@
def __repr__(self):
return "<TempPtr at %s>" % (id(self),)
+class TempFloat(TempBox):
+ type = FLOAT
+
+ def __repr__(self):
+ return "<TempFloat at %s>" % (id(self),)
+
+
+class FPRegisterManager(RegisterManager):
+ all_regs = r.ALL_FLOAT_REGS
+ box_types = [FLOAT]
+ save_around_call_regs = r.VOLATILES_FLOAT
+
+ def convert_to_imm(self, c):
+ adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
+ x = c.getfloatstorage()
+ rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
+ return locations.ConstFloatLoc(adr)
+
+ def __init__(self, longevity, frame_manager=None, assembler=None):
+ RegisterManager.__init__(self, longevity, frame_manager, assembler)
+
+ def call_result_location(self, v):
+ return r.f1
+
+ def ensure_value_is_boxed(self, thing, forbidden_vars=[]):
+ loc = None
+ if isinstance(thing, Const):
+ assert isinstance(thing, ConstFloat)
+ loc = self.get_scratch_reg(FLOAT, self.temp_boxes + forbidden_vars)
+ immvalue = self.convert_to_imm(thing)
+ self.assembler.load(loc, immvalue)
+ else:
+ loc = self.make_sure_var_in_reg(thing,
+ forbidden_vars=self.temp_boxes + forbidden_vars)
+ return loc
+
+ def get_scratch_reg(self, type=FLOAT, forbidden_vars=[],
+ selected_reg=None):
+ assert type == FLOAT # for now
+ box = TempFloat()
+ self.temp_boxes.append(box)
+ reg = self.force_allocate_reg(box, forbidden_vars=forbidden_vars,
+ selected_reg=selected_reg)
+ return reg
+
+
class PPCRegisterManager(RegisterManager):
all_regs = r.MANAGED_REGS
box_types = None # or a list of acceptable types
@@ -131,21 +181,15 @@
@staticmethod
def frame_pos(loc, type):
num_words = PPCFrameManager.frame_size(type)
- if type == FLOAT:
- assert 0, "not implemented yet"
return locations.StackLocation(loc, num_words=num_words, type=type)
@staticmethod
def frame_size(type):
- if type == FLOAT:
- assert 0, "TODO"
return 1
@staticmethod
def get_loc_index(loc):
assert loc.is_stack()
- if loc.type == FLOAT:
- assert 0, "not implemented yet"
return loc.position
class Regalloc(object):
@@ -164,6 +208,7 @@
self.last_real_usage = last_real_usage
fm = self.frame_manager
asm = self.assembler
+ self.fprm = FPRegisterManager(longevity, fm, asm)
self.rm = PPCRegisterManager(longevity, fm, asm)
def prepare_loop(self, inputargs, operations):
@@ -177,30 +222,39 @@
def _set_initial_bindings(self, inputargs):
arg_index = 0
- count = 0
+ fparg_index = 0
n_register_args = len(r.PARAM_REGS)
+ n_fpregister_args = len(r.PARAM_FPREGS)
cur_frame_pos = -self.assembler.OFFSET_STACK_ARGS // WORD + 1
for box in inputargs:
assert isinstance(box, Box)
- # handle inputargs in argument registers
- if box.type == FLOAT and arg_index % 2 != 0:
- assert 0, "not implemented yet"
- if arg_index < n_register_args:
- if box.type == FLOAT:
- assert 0, "not implemented yet"
+ if box.type == FLOAT:
+ if fparg_index < n_fpregister_args:
+ loc = r.PARAM_FPREGS[fparg_index]
+ self.try_allocate_reg(box, selected_reg=loc)
+ fparg_index += 1
+ # XXX stdarg placing float args in FPRs and GPRs
+ if arg_index < n_register_args:
+ arg_index += 1
+ else:
+ cur_frame_pos -= 1
else:
+ if IS_PPC_32:
+ cur_frame_pos -= 2
+ else:
+ cur_frame_pos -= 1
+ loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
+ self.frame_manager.set_binding(box, loc)
+ else:
+ if arg_index < n_register_args:
loc = r.PARAM_REGS[arg_index]
self.try_allocate_reg(box, selected_reg=loc)
arg_index += 1
- else:
+ else:
# treat stack args as stack locations with a negative offset
- if box.type == FLOAT:
- assert 0, "not implemented yet"
- else:
cur_frame_pos -= 1
- count += 1
- loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
- self.frame_manager.set_binding(box, loc)
+ loc = self.frame_manager.frame_pos(cur_frame_pos, box.type)
+ self.frame_manager.set_binding(box, loc)
def _update_bindings(self, locs, inputargs):
used = {}
@@ -210,8 +264,8 @@
i += 1
if loc.is_reg():
self.rm.reg_bindings[arg] = loc
- elif loc.is_vfp_reg():
- assert 0, "not supported"
+ elif loc.is_fp_reg():
+ self.fprm.reg_bindings[arg] = loc
else:
assert loc.is_stack()
self.frame_manager.set_binding(arg, loc)
@@ -222,16 +276,24 @@
for reg in self.rm.all_regs:
if reg not in used:
self.rm.free_regs.append(reg)
+ self.fprm.free_regs = []
+ for reg in self.fprm.all_regs:
+ if reg not in used:
+ self.fprm.free_regs.append(reg)
# note: we need to make a copy of inputargs because possibly_free_vars
# is also used on op args, which is a non-resizable list
self.possibly_free_vars(list(inputargs))
def possibly_free_var(self, var):
- self.rm.possibly_free_var(var)
+ if var.type == FLOAT:
+ self.fprm.possibly_free_var(var)
+ else:
+ self.rm.possibly_free_var(var)
def possibly_free_vars(self, vars):
for var in vars:
- self.possibly_free_var(var)
+ if var is not None: # xxx kludgy
+ self.possibly_free_var(var)
def possibly_free_vars_for_op(self, op):
for i in range(op.numargs()):
@@ -240,12 +302,19 @@
self.possibly_free_var(var)
def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False):
- return self.rm.try_allocate_reg(v, selected_reg, need_lower_byte)
+ if v.type == FLOAT:
+ return self.fprm.try_allocate_reg(v, selected_reg, need_lower_byte)
+ else:
+ return self.rm.try_allocate_reg(v, selected_reg, need_lower_byte)
def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
need_lower_byte=False):
- return self.rm.force_allocate_reg(var, forbidden_vars, selected_reg,
- need_lower_byte)
+ if var.type == FLOAT:
+ return self.fprm.force_allocate_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
+ else:
+ return self.rm.force_allocate_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
def allocate_scratch_reg(self, type=INT, forbidden_vars=[], selected_reg=None):
assert type == INT # XXX extend this once floats are supported
@@ -255,53 +324,59 @@
def _check_invariants(self):
self.rm._check_invariants()
+ self.fprm._check_invariants()
def loc(self, var):
if var.type == FLOAT:
- assert 0, "not implemented yet"
- return self.rm.loc(var)
+ return self.fprm.loc(var)
+ else:
+ return self.rm.loc(var)
def position(self):
return self.rm.position
def next_instruction(self):
self.rm.next_instruction()
+ self.fprm.next_instruction()
def force_spill_var(self, var):
if var.type == FLOAT:
- assert 0, "not implemented yet"
+ self.fprm.force_spill_var(var)
else:
self.rm.force_spill_var(var)
def before_call(self, force_store=[], save_all_regs=False):
self.rm.before_call(force_store, save_all_regs)
+ self.fprm.before_call(force_store, save_all_regs)
def after_call(self, v):
if v.type == FLOAT:
- assert 0, "not implemented yet"
+ return self.fprm.after_call(v)
else:
return self.rm.after_call(v)
def call_result_location(self, v):
if v.type == FLOAT:
- assert 0, "not implemented yet"
+ return self.fprm.call_result_location(v)
else:
return self.rm.call_result_location(v)
def _ensure_value_is_boxed(self, thing, forbidden_vars=[]):
if thing.type == FLOAT:
- assert 0, "not implemented yet"
+ return self.fprm.ensure_value_is_boxed(thing, forbidden_vars)
else:
return self.rm.ensure_value_is_boxed(thing, forbidden_vars)
def get_scratch_reg(self, type, forbidden_vars=[], selected_reg=None):
if type == FLOAT:
- assert 0, "not implemented yet"
+ return self.fprm.get_scratch_reg(type, forbidden_vars,
+ selected_reg)
else:
return self.rm.get_scratch_reg(type, forbidden_vars, selected_reg)
def free_temp_vars(self):
self.rm.free_temp_vars()
+ self.fprm.free_temp_vars()
def make_sure_var_in_reg(self, var, forbidden_vars=[],
selected_reg=None, need_lower_byte=False):
@@ -315,11 +390,12 @@
if isinstance(value, ConstInt):
return self.rm.convert_to_imm(value)
else:
- assert 0, "not implemented yet"
+ assert isinstance(value, ConstFloat)
+ return self.fprm.convert_to_imm(value)
def _sync_var(self, v):
if v.type == FLOAT:
- assert 0, "not implemented yet"
+ self.fprm._sync_var(v)
else:
self.rm._sync_var(v)
@@ -373,6 +449,65 @@
prepare_int_is_true = prepare_unary_cmp()
prepare_int_is_zero = prepare_unary_cmp()
+ prepare_float_add = prepare_float_op(name='prepare_float_add')
+ prepare_float_sub = prepare_float_op(name='prepare_float_sub')
+ prepare_float_mul = prepare_float_op(name='prepare_float_mul')
+ prepare_float_truediv = prepare_float_op(name='prepare_float_truediv')
+
+ prepare_float_lt = prepare_float_op(float_result=False,
+ name='prepare_op_float_lt')
+ prepare_float_le = prepare_float_op(float_result=False,
+ name='prepare_op_float_le')
+ prepare_float_eq = prepare_float_op(float_result=False,
+ name='prepare_op_float_eq')
+ prepare_float_ne = prepare_float_op(float_result=False,
+ name='prepare_op_float_ne')
+ prepare_float_gt = prepare_float_op(float_result=False,
+ name='prepare_op_float_gt')
+ prepare_float_ge = prepare_float_op(float_result=False,
+ name='prepare_op_float_ge')
+ prepare_float_neg = prepare_float_op(base=False,
+ name='prepare_op_float_neg')
+ prepare_float_abs = prepare_float_op(base=False,
+ name='prepare_op_float_abs')
+
+ prepare_guard_float_lt = prepare_float_op(guard=True,
+ float_result=False, name='prepare_guard_float_lt')
+ prepare_guard_float_le = prepare_float_op(guard=True,
+ float_result=False, name='prepare_guard_float_le')
+ prepare_guard_float_eq = prepare_float_op(guard=True,
+ float_result=False, name='prepare_guard_float_eq')
+ prepare_guard_float_ne = prepare_float_op(guard=True,
+ float_result=False, name='prepare_guard_float_ne')
+ prepare_guard_float_gt = prepare_float_op(guard=True,
+ float_result=False, name='prepare_guard_float_gt')
+ prepare_guard_float_ge = prepare_float_op(guard=True,
+ float_result=False, name='prepare_guard_float_ge')
+
+ def prepare_math_sqrt(self, op):
+ loc = self._ensure_value_is_boxed(op.getarg(1))
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.fprm.force_allocate_reg(op.result)
+ self.possibly_free_var(op.result)
+ return [loc, res]
+
+ def prepare_cast_float_to_int(self, op):
+ loc1 = self._ensure_value_is_boxed(op.getarg(0))
+ temp_loc = self.get_scratch_reg(FLOAT)
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.rm.force_allocate_reg(op.result)
+ return [loc1, temp_loc, res]
+
+ def prepare_cast_int_to_float(self, op):
+ loc1 = self._ensure_value_is_boxed(op.getarg(0))
+ temp_loc = self.get_scratch_reg(FLOAT)
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.fprm.force_allocate_reg(op.result)
+ return [loc1, temp_loc, res]
+
def prepare_finish(self, op):
args = [None] * (op.numargs() + 1)
for i in range(op.numargs()):
@@ -534,10 +669,13 @@
# get temporary locs
tmploc = r.SCRATCH
+ fptmploc = r.f0
# Part about non-floats
src_locations1 = []
dst_locations1 = []
+ src_locations2 = []
+ dst_locations2 = []
# Build the four lists
for i in range(op.numargs()):
@@ -548,10 +686,12 @@
src_locations1.append(src_loc)
dst_locations1.append(dst_loc)
else:
- assert 0, "not implemented yet"
+ src_locations2.append(src_loc)
+ dst_locations2.append(dst_loc)
- remap_frame_layout(self.assembler, src_locations1,
- dst_locations1, tmploc)
+ remap_frame_layout_mixed(self.assembler,
+ src_locations1, dst_locations1, tmploc,
+ src_locations2, dst_locations2, fptmploc)
return []
def prepare_setfield_gc(self, op):
@@ -766,8 +906,11 @@
def prepare_call(self, op):
effectinfo = op.getdescr().get_extra_info()
if effectinfo is not None:
- # XXX TODO
- pass
+ oopspecindex = effectinfo.oopspecindex
+ if oopspecindex == EffectInfo.OS_MATH_SQRT:
+ args = self.prepare_math_sqrt(op)
+ self.assembler.emit_math_sqrt(op, args, self)
+ return
return self._prepare_call(op)
def _prepare_call(self, op, force_store=[], save_all_regs=False):
@@ -776,6 +919,7 @@
for i in range(op.numargs()):
args.append(self.loc(op.getarg(i)))
# spill variables that need to be saved around calls
+ self.fprm.before_call(save_all_regs=save_all_regs)
if not save_all_regs:
gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
diff --git a/pypy/jit/backend/ppc/register.py b/pypy/jit/backend/ppc/register.py
--- a/pypy/jit/backend/ppc/register.py
+++ b/pypy/jit/backend/ppc/register.py
@@ -19,7 +19,7 @@
NONVOLATILES_FLOAT = [f14, f15, f16, f17, f18, f19, f20, f21, f22, f23,
f24, f25, f26, f27, f28, f29, f30, f31]
-
+VOLATILES_FLOAT = [f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13]
SCRATCH = r0
SP = r1
@@ -32,9 +32,15 @@
r19, r20, r21, r22, r23, r24, r25, r26,
r27, r28, r29, r30]
+MANAGED_FP_REGS = VOLATILES_FLOAT[1:] + NONVOLATILES_FLOAT
+
PARAM_REGS = [r3, r4, r5, r6, r7, r8, r9, r10]
+PARAM_FPREGS = [f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13]
def get_managed_reg_index(reg):
if reg > r13.value:
return reg - 4
return reg - 3
+
+def get_managed_fpreg_index(reg):
+ return reg - 1
diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py
--- a/pypy/jit/backend/ppc/runner.py
+++ b/pypy/jit/backend/ppc/runner.py
@@ -27,7 +27,7 @@
translate_support_code, gcdescr)
# floats are not supported yet
- self.supports_floats = False
+ self.supports_floats = True
def setup(self):
self.asm = AssemblerPPC(self)
@@ -100,10 +100,10 @@
bytecode = self.asm._find_failure_recovery_bytecode(faildescr)
addr_all_null_registers = rffi.cast(rffi.LONG, self.all_null_registers)
# start of "no gc operation!" block
- fail_index_2 = self.asm.decode_registers_and_descr(
+ fail_index_2 = self.asm.failure_recovery_func(
bytecode,
spilling_pointer,
- self.all_null_registers)
+ addr_all_null_registers)
self.asm.leave_jitted_hook()
# end of "no gc operation!" block
assert fail_index == fail_index_2
@@ -114,9 +114,11 @@
return self.asm.fail_boxes_count
# fetch the result of the computation and return it
+ def get_latest_value_float(self, index):
+ return self.asm.fail_boxes_float.getitem(index)
+
def get_latest_value_int(self, index):
- value = self.asm.fail_boxes_int.getitem(index)
- return value
+ return self.asm.fail_boxes_int.getitem(index)
def get_latest_value_ref(self, index):
return self.asm.fail_boxes_ptr.getitem(index)
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1206,6 +1206,80 @@
def test_virtual_ref_finish(self):
pass # VIRTUAL_REF_FINISH must not reach the backend nowadays
+ def test_arguments_to_execute_token(self):
+ # this test checks that execute_token() can be called with any
+ # variant of ints and floats as arguments
+ if self.cpu.supports_floats:
+ numkinds = 2
+ else:
+ numkinds = 1
+ seed = random.randrange(0, 10000)
+ print 'Seed is', seed # or choose it by changing the previous line
+ r = random.Random()
+ r.seed(seed)
+ for nb_args in range(50):
+ print 'Passing %d arguments to execute_token...' % nb_args
+ #
+ inputargs = []
+ values = []
+ for k in range(nb_args):
+ kind = r.randrange(0, numkinds)
+ if kind == 0:
+ inputargs.append(BoxInt())
+ values.append(r.randrange(-100000, 100000))
+ else:
+ inputargs.append(BoxFloat())
+ values.append(longlong.getfloatstorage(r.random()))
+ #
+ looptoken = JitCellToken()
+ faildescr = BasicFailDescr(42)
+ operations = []
+ retboxes = []
+ retvalues = []
+ #
+ ks = range(nb_args)
+ random.shuffle(ks)
+ for k in ks:
+ if isinstance(inputargs[k], BoxInt):
+ newbox = BoxInt()
+ x = r.randrange(-100000, 100000)
+ operations.append(
+ ResOperation(rop.INT_ADD, [inputargs[k],
+ ConstInt(x)], newbox)
+ )
+ y = values[k] + x
+ else:
+ newbox = BoxFloat()
+ x = r.random()
+ operations.append(
+ ResOperation(rop.FLOAT_ADD, [inputargs[k],
+ constfloat(x)], newbox)
+ )
+ y = longlong.getrealfloat(values[k]) + x
+ y = longlong.getfloatstorage(y)
+ kk = r.randrange(0, len(retboxes)+1)
+ retboxes.insert(kk, newbox)
+ retvalues.insert(kk, y)
+ #
+ operations.append(
+ ResOperation(rop.FINISH, retboxes, None, descr=faildescr)
+ )
+ print inputargs
+ print values
+ for op in operations:
+ print op
+ self.cpu.compile_loop(inputargs, operations, looptoken)
+ #
+ fail = self.cpu.execute_token(looptoken, *values)
+ assert fail.identifier == 42
+ #
+ for k in range(len(retvalues)):
+ if isinstance(retboxes[k], BoxInt):
+ got = self.cpu.get_latest_value_int(k)
+ else:
+ got = self.cpu.get_latest_value_float(k)
+ assert got == retvalues[k]
+
def test_jump(self):
# this test generates small loops where the JUMP passes many
# arguments of various types, shuffling them around.
More information about the pypy-commit
mailing list