[pypy-svn] pypy arm-backed-float: Finish implementing float operations

Wed Feb 2 13:49:47 CET 2011

Author: David Schneider <david.schneider at picle.org>
Branch: arm-backed-float
Changeset: r41544:5c8e9dd21de5
Date: 2011-01-18 18:08 +0100
http://bitbucket.org/pypy/pypy/changeset/5c8e9dd21de5/

Log:	Finish implementing float operations

diff --git a/pypy/jit/backend/arm/instruction_builder.py b/pypy/jit/backend/arm/instruction_builder.py
--- a/pypy/jit/backend/arm/instruction_builder.py
+++ b/pypy/jit/backend/arm/instruction_builder.py
@@ -303,15 +303,36 @@
     n = (0xE << 24
         | 0x5 << 9
         | 0x1 << 8 # 64 bit flag
-        | (table['opc1'] & 0xF) << 20
         | (table['opc3'] & 0x3) << 6)
-    def f(self, dd, dn, dm, cond=cond.AL):
-        instr = (n
-                | (cond & 0xF) << 28
-                | (dn & 0xF) << 16
-                | (dd & 0xF) << 12
-                | (dm & 0xF))
-        self.write32(instr)
+
+    if 'opc1' in table:
+        n |= (table['opc1'] & 0xF) << 20
+    if 'opc2' in table:
+        n |= (table['opc2'] & 0xF) << 16
+
+    if 'result' in table and not table['result']:
+        def f(self, dd, dm, cond=cond.AL):
+            instr = (n
+                    | (cond & 0xF) << 28
+                    | 0x4 << 16
+                    | (dd & 0xF) << 12
+                    | (dm & 0xF))
+            self.write32(instr)
+    elif 'base' in table and not table['base']:
+        def f(self, dd, dm, cond=cond.AL):
+            instr = (n
+                    | (cond & 0xF) << 28
+                    | (dd & 0xF) << 12
+                    | (dm & 0xF))
+            self.write32(instr)
+    else:
+        def f(self, dd, dn, dm, cond=cond.AL):
+            instr = (n
+                    | (cond & 0xF) << 28
+                    | (dn & 0xF) << 16
+                    | (dd & 0xF) << 12
+                    | (dm & 0xF))
+            self.write32(instr)
     return f
 
 def imm_operation(rt, rn, imm):

diff --git a/pypy/jit/backend/arm/instructions.py b/pypy/jit/backend/arm/instructions.py
--- a/pypy/jit/backend/arm/instructions.py
+++ b/pypy/jit/backend/arm/instructions.py
@@ -129,8 +129,12 @@
 # based on encoding from A7.5	VFP data-processing instructions
 # opc2 is one of the parameters and therefore ignored here
 float64_data_proc_instructions = {
-    'VADD' : {'opc1':0x3, 'opc3':0},
-    'VSUB' : {'opc1':0x3, 'opc3':1},
-    'VMUL' : {'opc1':0x2, 'opc3':0},
-    'VDIV' : {'opc1':0x8, 'opc3':0},
+    'VADD' : {'opc1':0x3, 'opc3':0x0},
+    'VSUB' : {'opc1':0x3, 'opc3':0x1},
+    'VMUL' : {'opc1':0x2, 'opc3':0x0},
+    'VDIV' : {'opc1':0x8, 'opc3':0x0},
+    'VCMP' : {'opc1':0xB, 'opc2':0x4, 'opc3':0x1, 'result': False},
+    'VNEG' : {'opc1':0xB, 'opc2':0x1, 'opc3':0x1, 'base': False},
+    'VABS' : {'opc1':0xB, 'opc2':0x0, 'opc3':0x3, 'base': False},
+    #'VCVT' : {'opc1':0xB, 'opc2':0xE, 'opc3':0x1, 'base': False},
 }

diff --git a/pypy/jit/backend/arm/test/test_instr_codebuilder.py b/pypy/jit/backend/arm/test/test_instr_codebuilder.py
--- a/pypy/jit/backend/arm/test/test_instr_codebuilder.py
+++ b/pypy/jit/backend/arm/test/test_instr_codebuilder.py
@@ -152,6 +152,10 @@
     def test_vstr_offset(self):
         assert py.test.raises(AssertionError, 'self.cb.VSTR(r.d1, r.r4, 3)')
 
+    def test_vmrs(self):
+        self.cb.VMRS(conditions.AL)
+        self.assert_equal("vmrs APSR_nzcv, fpscr")
+
     def test_pop_raises_on_lr(self):
         assert py.test.raises(AssertionError, 'self.cb.POP([r.lr.value])')
 
@@ -175,8 +179,15 @@
     tests = []
     for c,v in [('EQ', conditions.EQ), ('LE', conditions.LE), ('AL', conditions.AL)]:
         for reg in range(16):
-            asm = 'd%d, d1, d2' % reg
-            tests.append((asm, (reg, r.d1.value, r.d2.value), {}, '.F64'))
+            if 'result' in table and not table['result']:
+                asm = 'd%d, d2' % reg
+                tests.append((asm, (reg, r.d2.value), {}, '.F64'))
+            elif 'base' in table and not table['base']:
+                asm = 'd%d, d2' % reg
+                tests.append((asm, (reg, r.d2.value), {}, '.F64'))
+            else:
+                asm = 'd%d, d1, d2' % reg
+                tests.append((asm, (reg, r.d1.value, r.d2.value), {}, '.F64'))
     return tests
 
 def gen_test_data_proc_imm_func(name, table):

diff --git a/pypy/jit/backend/arm/codebuilder.py b/pypy/jit/backend/arm/codebuilder.py
--- a/pypy/jit/backend/arm/codebuilder.py
+++ b/pypy/jit/backend/arm/codebuilder.py
@@ -44,20 +44,60 @@
         self.write32(instr)
 
     def VPUSH(self, regs, cond=cond.AL):
-        nregs = len(regs) 
+        nregs = len(regs)
         assert nregs > 0 and nregs <= 16
         freg = regs[0]
         D = (freg & 0x10) >> 4
         Dd = (freg & 0xF)
-        nregs *= 2 
-        instr = (cond << 28 
-                | 0xD2D << 16 
-                | D << 22 
+        nregs *= 2
+        instr = (cond << 28
+                | 0xD2D << 16
+                | D << 22
                 | Dd << 12
                 | 0xB << 8
                 | nregs)
         self.write32(instr)
 
+    def VPOP(self, regs, cond=cond.AL):
+        nregs = len(regs)
+        assert nregs > 0 and nregs <= 16
+        freg = regs[0]
+        D = (freg & 0x10) >> 4
+        Dd = (freg & 0xF)
+        nregs *= 2
+        instr = (cond << 28
+                | 0xCBD << 16
+                | D << 22
+                | Dd << 12
+                | 0xB << 8
+                | nregs)
+        self.write32(instr)
+
+    def VCVT_float_to_int(self, target, source, cond=cond.AL):
+        opc2 = 0x5
+        sz = 1
+        self._VCVT(target, source, cond, opc2, sz)
+
+    def VCVT_int_to_float(self, target, source, cond=cond.AL):
+        self._VCVT(target, source, cond, 0, 1)
+
+    def _VCVT(self, target, source, cond, opc2, sz):
+        D = 0x0
+        M = 0
+        op = 1
+        instr = (cond << 28
+                | 0xEB8 << 16
+                | D << 22
+                | opc2 << 16
+                | (target & 0xF) << 12
+                | 0x5 << 9
+                | sz << 8
+                | op << 7
+                | 1 << 6
+                | M << 5
+                | (source & 0xF))
+        self.write32(instr)
+
     def POP(self, regs, cond=cond.AL):
         assert reg.lr.value not in regs
         instr = self._encode_reg_list(cond << 28 | 0x8BD << 16, regs)
@@ -66,6 +106,10 @@
     def BKPT(self, cond=cond.AL):
         self.write32(cond << 28 | 0x1200070)
 
+    # corresponds to the instruction vmrs APSR_nzcv, fpscr
+    def VMRS(self, cond=cond.AL):
+        self.write32(cond << 28 | 0xEF1FA10)
+
     def B(self, target, c=cond.AL):
         if c == cond.AL:
             self.LDR_ri(reg.pc.value, reg.pc.value, -arch.PC_OFFSET/2)

diff --git a/pypy/jit/backend/arm/registers.py b/pypy/jit/backend/arm/registers.py
--- a/pypy/jit/backend/arm/registers.py
+++ b/pypy/jit/backend/arm/registers.py
@@ -1,10 +1,11 @@
-from pypy.jit.backend.arm.locations import RegisterLocation
+from pypy.jit.backend.arm.locations import RegisterLocation, VFPRegisterLocation
 
 registers = [RegisterLocation(i) for i in range(16)]
+vfpregisters = [VFPRegisterLocation(i) for i in range(16)]
 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15 = registers
 
 #vfp registers interpreted as 64-bit registers
-d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15 = registers
+d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15 = vfpregisters
 
 # aliases for registers
 fp = r11
@@ -14,7 +15,7 @@
 pc = r15
 
 all_regs = [r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10]
-all_vfp_regs = registers
+all_vfp_regs = vfpregisters
 
 caller_resp = [r0, r1, r2, r3]
 callee_resp = [r4, r5, r6, r7, r8, r9, r10, fp]

diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -139,7 +139,7 @@
             i += 1
             res = enc[i]
             if res == self.IMM_LOC:
-                assert group == self.INT_TYPE
+                assert group == self.INT_TYPE or group == self.REF_TYPE
                 # imm value
                 value = self.decode32(enc, i+1)
                 i += 4
@@ -210,7 +210,7 @@
         low = self.decode32(mem, index)
         index += 4
         high = self.decode32(mem, index)
-        return r_longlong(r_uint(low) | (r_longlong(high << 32)))
+        return r_longlong(high << 32) | r_longlong(r_uint(low))
 
     def encode32(self, mem, i, n):
         mem[i] = chr(n & 0xFF)
@@ -273,7 +273,9 @@
                     mem[j] = chr(loc.value)
                     j += 1
                 elif loc.is_imm():
-                    assert arg.type == INT or arg.type == REF
+                    if not arg.type == INT or arg.type == REF:
+                        print "Expected INT or REF values"
+                        assert 0
                     mem[j] = self.IMM_LOC
                     self.encode32(mem, j+1, loc.getint())
                     j += 5

diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -10,7 +10,9 @@
                                                     gen_emit_op_unary_cmp,
                                                     gen_emit_op_ri,
                                                     gen_emit_cmp_op,
-                                                    gen_emit_float_op)
+                                                    gen_emit_float_op,
+                                                    gen_emit_float_cmp_op,
+                                                    gen_emit_unary_float_op)
 from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
 from pypy.jit.backend.arm.jump import remap_frame_layout
 from pypy.jit.backend.arm.regalloc import Regalloc
@@ -779,6 +781,29 @@
     emit_op_float_mul = gen_emit_float_op('VMUL')
     emit_op_float_truediv = gen_emit_float_op('VDIV')
 
+    emit_op_float_neg = gen_emit_unary_float_op('VNEG')
+    emit_op_float_abs = gen_emit_unary_float_op('VABS')
+
+    emit_op_float_lt = gen_emit_float_cmp_op(c.LT)
+    emit_op_float_le = gen_emit_float_cmp_op(c.LE)
+    emit_op_float_eq = gen_emit_float_cmp_op(c.EQ)
+    emit_op_float_ne = gen_emit_float_cmp_op(c.NE)
+    emit_op_float_gt = gen_emit_float_cmp_op(c.GT)
+    emit_op_float_ge = gen_emit_float_cmp_op(c.GE)
+
+    def emit_op_cast_float_to_int(self, op, arglocs, regalloc, fcond):
+        arg, temp, res = arglocs
+        self.mc.VCVT_float_to_int(temp.value, arg.value)
+        self.mc.VPUSH([temp.value])
+        # res is lower register than r.ip
+        self.mc.POP([res.value, r.ip.value])
+
+    def emit_op_cast_int_to_float(self, op, arglocs, regalloc, fcond):
+        arg, temp, res = arglocs
+        self.mc.PUSH([arg.value, r.ip.value])
+        self.mc.VPOP([temp.value])
+        self.mc.VCVT_int_to_float(res.value, temp.value)
+
 class ResOpAssembler(GuardOpAssembler, IntOpAsslember,
                     OpAssembler, UnaryIntOpAssembler,
                     FieldOpAssembler, ArrayOpAssember,

diff --git a/pypy/jit/backend/arm/helper/assembler.py b/pypy/jit/backend/arm/helper/assembler.py
--- a/pypy/jit/backend/arm/helper/assembler.py
+++ b/pypy/jit/backend/arm/helper/assembler.py
@@ -56,3 +56,20 @@
         arg1, arg2, result = arglocs
         op_rr(self.mc, result.value, arg1.value, arg2.value)
     return f
+def gen_emit_unary_float_op(opname):
+    op_rr = getattr(AbstractARMv7Builder, opname)
+    def f(self, op, arglocs, regalloc, fcon):
+        arg1, result = arglocs
+        op_rr(self.mc, result.value, arg1.value)
+    return f
+
+def gen_emit_float_cmp_op(cond):
+    def f(self, op, arglocs, regalloc, fcond):
+        arg1, arg2, res = arglocs
+        inv = c.get_opposite_of(cond)
+        self.mc.VCMP(arg1.value, arg2.value)
+        self.mc.VMRS(cond=fcond)
+        self.mc.MOV_ri(res.value, 1, cond=cond)
+        self.mc.MOV_ri(res.value, 0, cond=inv)
+        return fcond
+    return f

diff --git a/pypy/jit/backend/arm/helper/regalloc.py b/pypy/jit/backend/arm/helper/regalloc.py
--- a/pypy/jit/backend/arm/helper/regalloc.py
+++ b/pypy/jit/backend/arm/helper/regalloc.py
@@ -51,15 +51,24 @@
         return [l0, l1, res]
     return f
 
-def prepare_float_op():
+def prepare_float_op(base=True, float_result=True):
     def f(self, op, fcond):
+        locs = []
         loc1, box1 = self._ensure_value_is_boxed(op.getarg(0))
-        loc2, box2 = self._ensure_value_is_boxed(op.getarg(1))
+        locs.append(loc1)
         self.vfprm.possibly_free_var(box1)
-        self.vfprm.possibly_free_var(box2)
-        res  = self.vfprm.force_allocate_reg(op.result)
-        self.vfprm.possibly_free_var(op.result)
-        return [loc1, loc2, res]
+        if base:
+            loc2, box2 = self._ensure_value_is_boxed(op.getarg(1))
+            locs.append(loc2)
+            self.vfprm.possibly_free_var(box2)
+        if float_result:
+            res  = self.vfprm.force_allocate_reg(op.result)
+            self.vfprm.possibly_free_var(op.result)
+        else:
+            res  = self.rm.force_allocate_reg(op.result)
+            self.rm.possibly_free_var(op.result)
+        locs.append(res)
+        return locs
     return f
 
 def prepare_op_by_helper_call():

diff --git a/pypy/jit/backend/arm/locations.py b/pypy/jit/backend/arm/locations.py
--- a/pypy/jit/backend/arm/locations.py
+++ b/pypy/jit/backend/arm/locations.py
@@ -33,6 +33,15 @@
     def as_key(self):
         return self.value
 
+class VFPRegisterLocation(RegisterLocation):
+    _immutable_ = True
+
+    def get_single_precision_regs(self):
+        return [VFPRegisterLocation(i) for i in [self.value*2, self.value*2+1]]
+
+    def __repr__(self):
+        return 'f%d' % self.value
+
 class ImmLocation(AssemblerLocation):
     _immutable_ = True
 

diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -53,7 +53,7 @@
 class VFPRegisterManager(RegisterManager):
     all_regs = r.all_vfp_regs
     box_types = [FLOAT]
-    save_around_call_regs = all_regs
+    save_around_call_regs = r.all_vfp_regs
 
     def convert_to_imm(self, c):
         adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
@@ -786,6 +786,50 @@
     prepare_op_float_sub = prepare_float_op()
     prepare_op_float_mul = prepare_float_op()
     prepare_op_float_truediv = prepare_float_op()
+    prepare_op_float_lt = prepare_float_op(float_result=False)
+    prepare_op_float_le = prepare_float_op(float_result=False)
+    prepare_op_float_eq = prepare_float_op(float_result=False)
+    prepare_op_float_ne = prepare_float_op(float_result=False)
+    prepare_op_float_gt = prepare_float_op(float_result=False)
+    prepare_op_float_ge = prepare_float_op(float_result=False)
+    prepare_op_float_neg = prepare_float_op(base=False)
+    prepare_op_float_abs = prepare_float_op(base=False)
+
+    def prepare_op_cast_float_to_int(self, op, fcond):
+        locs = []
+
+        loc1, box1 = self._ensure_value_is_boxed(op.getarg(0))
+        locs.append(loc1)
+        self.possibly_free_var(box1)
+
+        t = TempFloat()
+        temp_loc = self.vfprm.force_allocate_reg(t)
+        locs.append(temp_loc)
+        self.possibly_free_var(t)
+
+        res  = self.rm.force_allocate_reg(op.result)
+        self.possibly_free_var(op.result)
+        locs.append(res)
+
+        return locs
+
+    def prepare_op_cast_int_to_float(self, op, fcond):
+        locs = []
+
+        loc1, box1 = self._ensure_value_is_boxed(op.getarg(0))
+        locs.append(loc1)
+        self.possibly_free_var(box1)
+
+        t = TempFloat()
+        temp_loc = self.vfprm.force_allocate_reg(t)
+        locs.append(temp_loc)
+        self.possibly_free_var(t)
+
+        res  = self.vfprm.force_allocate_reg(op.result)
+        self.possibly_free_var(op.result)
+        locs.append(res)
+
+        return locs
 
 def make_operation_list():
     def notimplemented(self, op, fcond):