[pypy-svn] pypy arm-backed-float: Start implementing support code for floats and FLOAT_ADD operation
bivab
commits-noreply at bitbucket.org
Mon Jan 17 17:36:14 CET 2011
Author: David Schneider <david.schneider at picle.org>
Branch: arm-backed-float
Changeset: r40782:69026fb1260a
Date: 2011-01-17 17:24 +0100
http://bitbucket.org/pypy/pypy/changeset/69026fb1260a/
Log: Start implementing support code for floats and FLOAT_ADD operation
diff --git a/pypy/jit/backend/arm/runner.py b/pypy/jit/backend/arm/runner.py
--- a/pypy/jit/backend/arm/runner.py
+++ b/pypy/jit/backend/arm/runner.py
@@ -9,7 +9,7 @@
class ArmCPU(AbstractLLCPU):
BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
- supports_floats = False
+ supports_floats = True
def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
gcdescr=None):
@@ -39,12 +39,18 @@
self.assembler.assemble_bridge(faildescr, inputargs, operations,
original_loop_token, log=log)
+ def set_future_value_float(self, index, intvalue):
+ self.assembler.fail_boxes_float.setitem(index, intvalue)
+
def set_future_value_int(self, index, intvalue):
self.assembler.fail_boxes_int.setitem(index, intvalue)
def set_future_value_ref(self, index, ptrvalue):
self.assembler.fail_boxes_ptr.setitem(index, ptrvalue)
+ def get_latest_value_float(self, index):
+ return self.assembler.fail_boxes_float.getitem(index)
+
def get_latest_value_int(self, index):
return self.assembler.fail_boxes_int.getitem(index)
diff --git a/pypy/jit/backend/arm/registers.py b/pypy/jit/backend/arm/registers.py
--- a/pypy/jit/backend/arm/registers.py
+++ b/pypy/jit/backend/arm/registers.py
@@ -14,6 +14,7 @@
pc = r15
all_regs = [r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10]
+all_vfp_regs = registers
caller_resp = [r0, r1, r2, r3]
callee_resp = [r4, r5, r6, r7, r8, r9, r10, fp]
diff --git a/pypy/jit/backend/arm/test/test_assembler.py b/pypy/jit/backend/arm/test/test_assembler.py
--- a/pypy/jit/backend/arm/test/test_assembler.py
+++ b/pypy/jit/backend/arm/test/test_assembler.py
@@ -11,16 +11,20 @@
from pypy.rpython.annlowlevel import llhelper
from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.jit.metainterp.history import LoopToken
+from pypy.jit.backend.model import CompiledLoopToken
skip_unless_arm()
CPU = getcpuclass()
-class TestRunningAssembler():
+class TestRunningAssembler(object):
def setup_method(self, method):
cpu = CPU(None, None)
+ lp = LoopToken()
+ lp.compiled_loop_token = CompiledLoopToken(cpu, None)
self.a = AssemblerARM(cpu)
self.a.setup_once()
- self.a.setup()
+ self.a.setup(lp)
def test_make_operation_list(self):
i = rop.INT_ADD
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -3,8 +3,9 @@
from pypy.jit.backend.arm import registers as r
from pypy.jit.backend.arm.arch import WORD, FUNC_ALIGN, PC_OFFSET
from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
-from pypy.jit.backend.arm.regalloc import (ARMRegisterManager, ARMFrameManager,
+from pypy.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
_check_imm_arg, TempInt, TempPtr)
+from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from pypy.jit.backend.llsupport.regalloc import compute_vars_longevity, TempBox
from pypy.jit.backend.model import CompiledLoopToken
from pypy.jit.metainterp.history import (Const, ConstInt, ConstPtr,
@@ -12,7 +13,9 @@
INT, REF, FLOAT)
from pypy.jit.metainterp.resoperation import rop
from pypy.rlib import rgc
+from pypy.rlib.longlong2float import float2longlong, longlong2float
from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rarithmetic import r_uint, r_longlong
from pypy.rpython.annlowlevel import llhelper
from pypy.rpython.lltypesystem import lltype, rffi, llmemory
from pypy.jit.backend.arm.opassembler import ResOpAssembler
@@ -38,8 +41,9 @@
\xFF = END_OF_LOCS
"""
- REF_TYPE = '\xEE'
- INT_TYPE = '\xEF'
+ FLOAT_TYPE = '\xED'
+ REF_TYPE = '\xEE'
+ INT_TYPE = '\xEF'
STACK_LOC = '\xFC'
IMM_LOC = '\xFD'
@@ -52,6 +56,7 @@
def __init__(self, cpu, failargs_limit=1000):
self.cpu = cpu
self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
+ self.fail_boxes_float = values_array(lltype.Float, failargs_limit)
self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
self.setup_failure_recovery()
self.mc = None
@@ -62,11 +67,16 @@
self.memcpy_addr = 0
self.teardown()
self._exit_code_addr = 0
+ self.datablockwrapper = None
- def setup(self):
+ def setup(self, looptoken):
assert self.memcpy_addr != 0, 'setup_once() not called?'
self.mc = ARMv7Builder()
self.guard_descrs = []
+ if self.datablockwrapper is None:
+ allblocks = self.get_asmmemmgr_blocks(looptoken)
+ self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+ allblocks)
def setup_once(self):
# Addresses of functions called by new_xxx operations
@@ -110,9 +120,10 @@
Values for spilled vars and registers are stored on stack at frame_loc
"""
enc = rffi.cast(rffi.CCHARP, mem_loc)
- frame_depth = frame_loc - (regs_loc + len(r.all_regs)*WORD)
+ frame_depth = frame_loc - (regs_loc + len(r.all_regs)*WORD + len(r.all_vfp_regs)*2*WORD)
stack = rffi.cast(rffi.CCHARP, frame_loc - frame_depth)
- regs = rffi.cast(rffi.CCHARP, regs_loc)
+ vfp_regs = rffi.cast(rffi.CCHARP, regs_loc)
+ regs = rffi.cast(rffi.CCHARP, regs_loc + len(r.all_vfp_regs)*2*WORD)
i = -1
fail_index = -1
while(True):
@@ -138,12 +149,18 @@
i += 4
else: # REG_LOC
reg = ord(enc[i])
- value = self.decode32(regs, reg*WORD)
+ if group == self.FLOAT_TYPE:
+ t = self.decode64(vfp_regs, reg*2*WORD)
+ value = longlong2float(t)
+ else:
+ value = self.decode32(regs, reg*WORD)
if group == self.INT_TYPE:
self.fail_boxes_int.setitem(fail_index, value)
elif group == self.REF_TYPE:
self.fail_boxes_ptr.setitem(fail_index, rffi.cast(llmemory.GCREF, value))
+ elif group == self.FLOAT_TYPE:
+ self.fail_boxes_float.setitem(fail_index, rffi.cast(lltype.Float, value))
else:
assert 0, 'unknown type'
@@ -189,6 +206,12 @@
| ord(mem[index+2]) << 16
| highval << 24)
+ def decode64(self, mem, index):
+ low = self.decode32(mem, index)
+ index += 4
+ high = self.decode32(mem, index)
+ return r_longlong(r_uint(low) | (r_longlong(high << 32)))
+
def encode32(self, mem, i, n):
mem[i] = chr(n & 0xFF)
mem[i+1] = chr((n >> 8) & 0xFF)
@@ -199,10 +222,11 @@
mc = ARMv7Builder()
decode_registers_addr = llhelper(self.recovery_func_sign, self.failure_recovery_func)
- mc.PUSH([reg.value for reg in r.all_regs]) # registers r0 .. r10
- mc.MOV_rr(r.r0.value, r.lr.value) # move mem block address, to r0 to pass as
- mc.MOV_rr(r.r1.value, r.fp.value) # pass the current frame pointer as second param
- mc.MOV_rr(r.r2.value, r.sp.value) # pass the current stack pointer as third param
+ mc.PUSH([reg.value for reg in r.all_regs]) # registers r0 .. r10
+ mc.VPUSH([reg.value for reg in r.all_vfp_regs]) # registers d0 .. d15
+ mc.MOV_rr(r.r0.value, r.lr.value) # move mem block address, to r0
+ mc.MOV_rr(r.r1.value, r.fp.value) # pass the current frame pointer as second param
+ mc.MOV_rr(r.r2.value, r.sp.value) # pass the current stack pointer as third param
mc.BL(rffi.cast(lltype.Signed, decode_registers_addr))
mc.MOV_rr(r.ip.value, r.r0.value)
@@ -237,12 +261,13 @@
loc = arglocs[i+1]
if arg.type == INT:
mem[j] = self.INT_TYPE
- j += 1
elif arg.type == REF:
mem[j] = self.REF_TYPE
- j += 1
+ elif arg.type == FLOAT:
+ mem[j] = self.FLOAT_TYPE
else:
assert 0, 'unknown type'
+ j += 1
if loc.is_reg():
mem[j] = chr(loc.value)
@@ -296,10 +321,15 @@
addr = self.fail_boxes_ptr.get_addr_for_num(i)
elif loc.type == INT:
addr = self.fail_boxes_int.get_addr_for_num(i)
+ elif loc.type == FLOAT:
+ addr = self.fail_boxes_float.get_addr_for_num(i)
else:
raise ValueError
- self.mc.gen_load_int(reg.value, addr)
- self.mc.LDR_ri(reg.value, reg.value)
+ self.mc.gen_load_int(r.ip.value, addr)
+ if not loc.type == FLOAT:
+ self.mc.LDR_ri(reg.value, r.ip.value)
+ else:
+ self.mc.VLDR(reg.value, r.ip.value)
regalloc.possibly_free_var(loc)
arglocs = [regalloc.loc(arg) for arg in inputargs]
looptoken._arm_arglocs = arglocs
@@ -334,13 +364,14 @@
# cpu interface
def assemble_loop(self, inputargs, operations, looptoken, log):
- self.setup()
- longevity = compute_vars_longevity(inputargs, operations)
- regalloc = ARMRegisterManager(longevity, assembler=self, frame_manager=ARMFrameManager())
-
clt = CompiledLoopToken(self.cpu, looptoken.number)
looptoken.compiled_loop_token = clt
+ self.setup(looptoken)
+ longevity = compute_vars_longevity(inputargs, operations)
+ regalloc = Regalloc(longevity, assembler=self, frame_manager=ARMFrameManager())
+
+
self.align()
self.gen_func_prolog()
arglocs = self.gen_bootstrap_code(inputargs, regalloc, looptoken)
@@ -372,12 +403,12 @@
def assemble_bridge(self, faildescr, inputargs, operations,
original_loop_token, log):
- self.setup()
+ self.setup(original_loop_token)
assert isinstance(faildescr, AbstractFailDescr)
code = faildescr._failure_recovery_code
enc = rffi.cast(rffi.CCHARP, code)
longevity = compute_vars_longevity(inputargs, operations)
- regalloc = ARMRegisterManager(longevity, assembler=self,
+ regalloc = Regalloc(longevity, assembler=self,
frame_manager=ARMFrameManager())
frame_depth = faildescr._arm_frame_depth
@@ -400,6 +431,8 @@
self.teardown()
def materialize_loop(self, looptoken):
+ self.datablockwrapper.done() # finish using cpu.asmmemmgr
+ self.datablockwrapper = None
allblocks = self.get_asmmemmgr_blocks(looptoken)
return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
self.cpu.gc_ll_descr.gcrootmap)
@@ -523,8 +556,11 @@
# regalloc support
def load(self, loc, value):
assert loc.is_reg()
- assert value.is_imm()
- self.mc.gen_load_int(loc.value, value.getint())
+ if value.is_imm():
+ self.mc.gen_load_int(loc.value, value.getint())
+ elif value.is_imm_float():
+ self.mc.gen_load_int(r.ip.value, value.getint())
+ self.mc.VLDR(loc.value, r.ip.value)
def regalloc_mov(self, prev_loc, loc):
if prev_loc.is_imm():
diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -11,7 +11,7 @@
gen_emit_op_ri, gen_emit_cmp_op)
from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
from pypy.jit.backend.arm.jump import remap_frame_layout
-from pypy.jit.backend.arm.regalloc import ARMRegisterManager
+from pypy.jit.backend.arm.regalloc import Regalloc
from pypy.jit.backend.llsupport import symbolic
from pypy.jit.backend.llsupport.descr import BaseFieldDescr, BaseArrayDescr
from pypy.jit.backend.llsupport.regalloc import compute_vars_longevity, TempBox
@@ -769,10 +769,18 @@
emit_op_newstr = emit_op_new_array
emit_op_newunicode = emit_op_new_array
+class FloatOpAssemlber(object):
+ _mixin_ = True
+
+ def emit_op_float_add(self, op, arglocs, regalloc, fcon):
+ arg1, arg2, result = arglocs
+ self.mc.VADD(result.value, arg1.value, arg2.value)
+
class ResOpAssembler(GuardOpAssembler, IntOpAsslember,
OpAssembler, UnaryIntOpAssembler,
FieldOpAssembler, ArrayOpAssember,
StrOpAssembler, UnicodeOpAssembler,
- ForceOpAssembler, AllocOpAssembler):
+ ForceOpAssembler, AllocOpAssembler,
+ FloatOpAssemlber):
pass
diff --git a/pypy/jit/backend/arm/locations.py b/pypy/jit/backend/arm/locations.py
--- a/pypy/jit/backend/arm/locations.py
+++ b/pypy/jit/backend/arm/locations.py
@@ -12,6 +12,9 @@
def is_reg(self):
return False
+ def is_imm_float(self):
+ return False
+
def as_key(self):
raise NotImplementedError
@@ -48,6 +51,24 @@
def as_key(self):
return self.value + 20
+class ConstFloatLoc(AssemblerLocation):
+ _immutable_ = True
+
+ def __init__(self, value):
+ self.value = value
+
+ def getint(self):
+ return self.value
+
+ def __repr__(self):
+ return "imm_float(%d)" % (self.value)
+
+ def is_imm_float(self):
+ return True
+
+ def as_key(self):
+ return -1 * self.value
+
class StackLocation(AssemblerLocation):
_immutable_ = True
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -8,8 +8,8 @@
prepare_op_ri,
prepare_cmp_op,
_check_imm_arg)
-from pypy.jit.metainterp.history import (Const, ConstInt, ConstPtr, Box,
- BoxInt, BoxPtr, AbstractFailDescr,
+from pypy.jit.metainterp.history import (Const, ConstInt, ConstFloat, ConstPtr,
+ Box, BoxInt, BoxPtr, AbstractFailDescr,
INT, REF, FLOAT, LoopToken)
from pypy.jit.metainterp.resoperation import rop
from pypy.jit.backend.llsupport.descr import BaseFieldDescr, BaseArrayDescr
@@ -23,11 +23,19 @@
def __repr__(self):
return "<TempInt at %s>" % (id(self),)
+
class TempPtr(TempBox):
type = REF
+
def __repr__(self):
return "<TempPtr at %s>" % (id(self),)
+class TempFloat(TempBox):
+ type = FLOAT
+
+ def __repr__(self):
+ return "<TempFloat at %s>" % (id(self),)
+
class ARMFrameManager(FrameManager):
def __init__(self):
FrameManager.__init__(self)
@@ -41,48 +49,31 @@
def void(self, op, fcond):
return []
-class ARMRegisterManager(RegisterManager):
+class VFPRegisterManager(RegisterManager):
+ all_regs = r.all_vfp_regs
+ box_types = [FLOAT]
+ save_around_call_regs = all_regs
+
+ def convert_to_imm(self, c):
+ adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
+ rffi.cast(rffi.CArrayPtr(rffi.DOUBLE), adr)[0] = c.getfloat()
+ return locations.ConstFloatLoc(adr)
+
+ def __init__(self, longevity, frame_manager=None, assembler=None):
+ RegisterManager.__init__(self, longevity, frame_manager, assembler)
+
+class ARMv7RegisterMananger(RegisterManager):
all_regs = r.all_regs
box_types = None # or a list of acceptable types
no_lower_byte_regs = all_regs
save_around_call_regs = r.caller_resp
def __init__(self, longevity, frame_manager=None, assembler=None):
- self.cpu = assembler.cpu
RegisterManager.__init__(self, longevity, frame_manager, assembler)
- def convert_to_imm(self, c):
- if isinstance(c, ConstInt):
- return locations.ImmLocation(c.value)
- else:
- assert isinstance(c, ConstPtr)
- return locations.ImmLocation(rffi.cast(lltype.Signed, c.value))
-
def call_result_location(self, v):
return r.r0
- def update_bindings(self, locs, frame_depth, inputargs):
- used = {}
- i = 0
- self.frame_manager.frame_depth = frame_depth
- for loc in locs:
- arg = inputargs[i]
- i += 1
- if loc.is_reg():
- self.reg_bindings[arg] = loc
- else:
- self.frame_manager.frame_bindings[arg] = loc
- used[loc] = None
-
- # XXX combine with x86 code and move to llsupport
- self.free_regs = []
- for reg in self.all_regs:
- if reg not in used:
- self.free_regs.append(reg)
- # note: we need to make a copy of inputargs because possibly_free_vars
- # is also used on op args, which is a non-resizable list
- self.possibly_free_vars(list(inputargs))
-
def before_call(self, force_store=[], save_all_regs=False):
for v, reg in self.reg_bindings.items():
if(reg in self.save_around_call_regs and v not in force_store and
@@ -98,6 +89,88 @@
del self.reg_bindings[v]
self.free_regs.append(reg)
+ def convert_to_imm(self, c):
+ if isinstance(c, ConstInt):
+ return locations.ImmLocation(c.value)
+ else:
+ assert isinstance(c, ConstPtr)
+ return locations.ImmLocation(rffi.cast(lltype.Signed, c.value))
+
+class Regalloc(object):
+
+ def __init__(self, longevity, frame_manager=None, assembler=None):
+ self.cpu = assembler.cpu
+ self.longevity = longevity
+ self.frame_manager = frame_manager
+ self.assembler = assembler
+ self.vfprm = VFPRegisterManager(longevity, frame_manager, assembler)
+ self.rm = ARMv7RegisterMananger(longevity, frame_manager, assembler)
+
+ def loc(self, var):
+ if var.type == FLOAT:
+ return self.vfprm.loc(var)
+ else:
+ return self.rm.loc(var)
+
+ def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
+ need_lower_byte=False):
+ if var.type == FLOAT:
+ return self.vfprm.force_allocate_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
+ else:
+ return self.rm.force_allocate_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
+ def possibly_free_var(self, var):
+ if var.type == FLOAT:
+ self.vfprm.possibly_free_var(var)
+ else:
+ self.rm.possibly_free_var(var)
+
+ def possibly_free_vars_for_op(self, op):
+ for i in range(op.numargs()):
+ var = op.getarg(i)
+ if var is not None: # xxx kludgy
+ self.possibly_free_var(var)
+
+ def possibly_free_vars(self, vars):
+ for var in vars:
+ if var is not None: # xxx kludgy
+ self.possibly_free_var(var)
+
+ def make_sure_var_in_reg(self, var, forbidden_vars=[],
+ selected_reg=None, need_lower_byte=False):
+ if var.type == FLOAT:
+ return self.vfprm.make_sure_var_in_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
+ else:
+ return self.rm.make_sure_var_in_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
+
+
+ def update_bindings(self, locs, frame_depth, inputargs):
+ used = {}
+ i = 0
+ self.frame_manager.frame_depth = frame_depth
+ for loc in locs:
+ arg = inputargs[i]
+ i += 1
+ if loc.is_reg():
+ self.reg_bindings[arg] = loc
+ #XXX add float
+ else:
+ self.frame_manager.frame_bindings[arg] = loc
+ used[loc] = None
+
+ # XXX combine with x86 code and move to llsupport
+ self.free_regs = []
+ for reg in self.all_regs:
+ if reg not in used:
+ self.free_regs.append(reg)
+ # note: we need to make a copy of inputargs because possibly_free_vars
+ # is also used on op args, which is a non-resizable list
+ self.possibly_free_vars(list(inputargs))
+
+
def force_spill_var(self, var):
self._sync_var(var)
try:
@@ -119,11 +192,16 @@
box = TempInt()
elif isinstance(thing, ConstPtr):
box = TempPtr()
+ elif isinstance(thing, ConstFloat):
+ box = TempFloat()
else:
box = TempBox()
loc = self.force_allocate_reg(box,
forbidden_vars=forbidden_vars)
- imm = self.convert_to_imm(thing)
+ if isinstance(thing, ConstFloat):
+ imm = self.vfprm.convert_to_imm(thing)
+ else:
+ imm = self.rm.convert_to_imm(thing)
self.assembler.load(loc, imm)
else:
loc = self.make_sure_var_in_reg(thing,
@@ -703,6 +781,14 @@
return size, scale, ofs, ofs_length, ptr
+ def prepare_op_float_add(self, op, fcond):
+ loc1, box1 = self._ensure_value_is_boxed(op.getarg(0))
+ loc2, box2 = self._ensure_value_is_boxed(op.getarg(1))
+ self.vfprm.possibly_free_var(box1)
+ self.vfprm.possibly_free_var(box2)
+ res = self.vfprm.force_allocate_reg(op.result)
+ self.vfprm.possibly_free_var(op.result)
+ return [loc1, loc2, res]
def make_operation_list():
def notimplemented(self, op, fcond):
@@ -714,8 +800,8 @@
if key.startswith('_'):
continue
methname = 'prepare_op_%s' % key
- if hasattr(ARMRegisterManager, methname):
- func = getattr(ARMRegisterManager, methname).im_func
+ if hasattr(Regalloc, methname):
+ func = getattr(Regalloc, methname).im_func
else:
func = notimplemented
operations[value] = func
@@ -730,10 +816,10 @@
if key.startswith('_'):
continue
methname = 'prepare_guard_%s' % key
- if hasattr(ARMRegisterManager, methname):
- func = getattr(ARMRegisterManager, methname).im_func
+ if hasattr(Regalloc, methname):
+ func = getattr(Regalloc, methname).im_func
guard_operations[value] = func
return guard_operations
-ARMRegisterManager.operations = make_operation_list()
-ARMRegisterManager.operations_with_guard = make_guard_operation_list()
+Regalloc.operations = make_operation_list()
+Regalloc.operations_with_guard = make_guard_operation_list()
More information about the Pypy-commit
mailing list