[pypy-commit] pypy backend-vector-ops: Good. First go at vectorized operations - support double reading writing

fijal noreply at buildbot.pypy.org
Wed Feb 1 13:43:25 CET 2012


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: backend-vector-ops
Changeset: r52010:b60d7a3bcf8f
Date: 2012-02-01 14:42 +0200
http://bitbucket.org/pypy/pypy/changeset/b60d7a3bcf8f/

Log:	Good. First go at vectorized operations - support double reading
	writing and adding in the x86 backend. No spilling so far

diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -10,6 +10,8 @@
     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
     supports_singlefloats = False
+    supports_vector_ops = False
+    # SSE and similar
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -3141,10 +3141,29 @@
         assert fail.identifier == 42
 
     def test_vector_ops(self):
-        ops = """
-        [p0]
-        guard_array_aligned(p0) []
-        """
+        if not self.cpu.supports_vector_ops:
+            py.test.skip("unsupported vector ops")
+        
+        A = lltype.Array(lltype.Float, hints={'nolength': True,
+                                               'memory_position_alignment': 16})
+        descr0 = self.cpu.arraydescrof(A)
+        looptoken = JitCellToken()
+        ops = parse("""
+        [p0, p1]
+        vec0 = getarrayitem_vector_raw(p0, 0, descr=descr0)
+        vec1 = getarrayitem_vector_raw(p1, 0, descr=descr0)
+        vec2 = float_vector_add(vec0, vec1)
+        setarrayitem_vector_raw(p0, 0, vec2, descr=descr0)
+        finish()
+        """, namespace=locals())
+        self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
+        a = lltype.malloc(A, 10, flavor='raw')
+        a[0] = 13.0
+        a[1] = 15.0
+        self.cpu.execute_token(looptoken, a, a)
+        assert a[0] == 26
+        assert a[1] == 30
+        lltype.free(a, flavor='raw')
 
 class OOtypeBackendTest(BaseBackendTest):
 
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -45,6 +45,7 @@
 # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
 # better safe than sorry
 CALL_ALIGN = 16 // WORD
+FLOAT_VECTOR_SIZE = 1 # multiply by 2
 
 def align_stack_words(words):
     return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
@@ -1164,6 +1165,7 @@
     genop_int_rshift = _binaryop("SAR")
     genop_uint_rshift = _binaryop("SHR")
     genop_float_add = _binaryop("ADDSD", True)
+    genop_float_vector_add = _binaryop("ADDPD", True)
     genop_float_sub = _binaryop('SUBSD')
     genop_float_mul = _binaryop('MULSD', True)
     genop_float_truediv = _binaryop('DIVSD')
@@ -1458,6 +1460,13 @@
     genop_getarrayitem_gc_pure = genop_getarrayitem_gc
     genop_getarrayitem_raw = genop_getarrayitem_gc
 
+    def genop_getarrayitem_vector_raw(self, op, arglocs, resloc):
+        base_loc, ofs_loc, size_loc, _, sign_loc = arglocs
+        assert isinstance(size_loc, ImmedLoc)
+        scale = _get_scale(size_loc.value)
+        src_addr = addr_add(base_loc, ofs_loc, 0, scale)
+        self.mc.MOVDQA(resloc, src_addr)
+
     def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
                                 base_loc, ofs_loc):
         assert isinstance(itemsize_loc, ImmedLoc)
@@ -1510,6 +1519,13 @@
         dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
         self.save_into_mem(dest_addr, value_loc, size_loc)
 
+    def genop_discard_setarrayitem_vector_raw(self, op, arglocs):
+        base_loc, ofs_loc, value_loc, size_loc, _ = arglocs
+        assert isinstance(size_loc, ImmedLoc)
+        scale = _get_scale(size_loc.value)
+        dest_addr = AddressLoc(base_loc, ofs_loc, scale, 0)
+        self.mc.MOVDQA(dest_addr, value_loc)
+
     def genop_discard_strsetitem(self, op, arglocs):
         base_loc, ofs_loc, val_loc = arglocs
         basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -5,7 +5,7 @@
 import os
 from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
                                          ResOperation, BoxPtr, ConstFloat,
-                                         BoxFloat, INT, REF, FLOAT,
+                                         BoxFloat, INT, REF, FLOAT, VECTOR,
                                          TargetToken, JitCellToken)
 from pypy.jit.backend.x86.regloc import *
 from pypy.rpython.lltypesystem import lltype, rffi, rstr
@@ -87,7 +87,7 @@
 
 class X86XMMRegisterManager(RegisterManager):
 
-    box_types = [FLOAT]
+    box_types = [FLOAT, VECTOR]
     all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
     # we never need lower byte I hope
     save_around_call_regs = all_regs
@@ -256,7 +256,7 @@
         return pass_on_stack
 
     def possibly_free_var(self, var):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             self.xrm.possibly_free_var(var)
         else:
             self.rm.possibly_free_var(var)
@@ -274,7 +274,7 @@
 
     def make_sure_var_in_reg(self, var, forbidden_vars=[],
                              selected_reg=None, need_lower_byte=False):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             if isinstance(var, ConstFloat):
                 return FloatImmedLoc(var.getfloatstorage())
             return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
@@ -285,7 +285,7 @@
 
     def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
                            need_lower_byte=False):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             return self.xrm.force_allocate_reg(var, forbidden_vars,
                                                selected_reg, need_lower_byte)
         else:
@@ -293,7 +293,7 @@
                                               selected_reg, need_lower_byte)
 
     def force_spill_var(self, var):
-        if var.type == FLOAT:
+        if var.type in self.xrm.box_types:
             return self.xrm.force_spill_var(var)
         else:
             return self.rm.force_spill_var(var)
@@ -530,7 +530,7 @@
     def loc(self, v):
         if v is None: # xxx kludgy
             return None
-        if v.type == FLOAT:
+        if v.type in self.xrm.box_types:
             return self.xrm.loc(v)
         return self.rm.loc(v)
 
@@ -701,6 +701,7 @@
         self.xrm.possibly_free_vars_for_op(op)
 
     consider_float_add = _consider_float_op
+    consider_float_vector_add = _consider_float_op
     consider_float_sub = _consider_float_op
     consider_float_mul = _consider_float_op
     consider_float_truediv = _consider_float_op
@@ -1080,6 +1081,7 @@
                                  imm(itemsize), imm(ofs)])
 
     consider_setarrayitem_raw = consider_setarrayitem_gc
+    consider_setarrayitem_vector_raw = consider_setarrayitem_gc
 
     def consider_getfield_gc(self, op):
         ofs_loc, size_loc, sign = self._unpack_fielddescr(op.getdescr())
@@ -1112,6 +1114,7 @@
                           sign_loc], result_loc)
 
     consider_getarrayitem_raw = consider_getarrayitem_gc
+    consider_getarrayitem_vector_raw = consider_getarrayitem_gc
     consider_getarrayitem_gc_pure = consider_getarrayitem_gc
 
     def consider_getinteriorfield_gc(self, op):
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -556,6 +556,7 @@
 
     MOVSD = _binaryop('MOVSD')
     MOVAPD = _binaryop('MOVAPD')
+    MOVDQA = _binaryop('MOVDQA')
     ADDSD = _binaryop('ADDSD')
     ADDPD = _binaryop('ADDPD')
     SUBSD = _binaryop('SUBSD')
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -21,6 +21,7 @@
     debug = True
     supports_floats = True
     supports_singlefloats = True
+    supports_vector_ops = True
 
     dont_keepalive_stuff = False # for tests
     with_threads = False
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -714,12 +714,18 @@
 define_modrm_modes('MOVSX16_r*', [rex_w, '\x0F\xBF', register(1, 8)])
 define_modrm_modes('MOVSX32_r*', [rex_w, '\x63', register(1, 8)])
 
-define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)], regtype='XMM')
-define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)], regtype='XMM')
+define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)],
+                   regtype='XMM')
+define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)],
+                   regtype='XMM')
 define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8)],
                    regtype='XMM')
 define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8)],
                    regtype='XMM')
+define_modrm_modes('MOVDQA_x*', ['\x66', rex_nw, '\x0F\x6F', register(1, 8)],
+                   regtype='XMM')
+define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)],
+                   regtype='XMM')
 
 define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)], regtype='XMM')
 
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -273,6 +273,9 @@
 # ____________________________________________________________
 
 
+IGNORED = ['FLOAT_VECTOR_ADD', 'GETARRAYITEM_VECTOR_RAW',
+           'SETARRAYITEM_VECTOR_RAW']
+
 def _make_execute_list():
     if 0:     # enable this to trace calls to do_xxx
         def wrap(fn):
@@ -349,7 +352,8 @@
                          rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
-            raise AssertionError("missing %r" % (key,))
+            if not key in IGNORED:
+                raise AssertionError("missing %r" % (key,))
     return execute_by_num_args
 
 def make_execute_function_with_boxes(name, func):
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -482,17 +482,14 @@
     def repr_rpython(self):
         return repr_rpython(self, 'bi')
 
-class BoxFloatVector(Box):
+class BoxVector(Box):
     type = VECTOR
 
-    def __init__(self, floats):
-        self.floats = floats
+    def __init__(self):
+        pass
 
-class BoxIntVector(Box):
-    type = VECTOR
-
-    def __init__(self, ints):
-        self.ints = ints
+    def _getrepr_(self):
+        return ''
 
 class BoxFloat(Box):
     type = FLOAT
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -489,7 +489,7 @@
 
     'SETARRAYITEM_GC/3d',
     'SETARRAYITEM_RAW/3d',
-    'SETARRAYITEM_VECTOR_RAW/2d',
+    'SETARRAYITEM_VECTOR_RAW/3d',
     'SETINTERIORFIELD_GC/3d',
     'SETINTERIORFIELD_RAW/3d',
     'SETFIELD_GC/2d',
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -114,6 +114,9 @@
         elif elem.startswith('f'):
             box = self.model.BoxFloat()
             _box_counter_more_than(self.model, elem[1:])
+        elif elem.startswith('vec'):
+            box = self.model.BoxVector()
+            _box_counter_more_than(self.model, elem[3:])
         elif elem.startswith('p'):
             # pointer
             ts = getattr(self.cpu, 'ts', self.model.llhelper)
diff --git a/pypy/jit/tool/oparser_model.py b/pypy/jit/tool/oparser_model.py
--- a/pypy/jit/tool/oparser_model.py
+++ b/pypy/jit/tool/oparser_model.py
@@ -4,7 +4,7 @@
 def get_real_model():
     class LoopModel(object):
         from pypy.jit.metainterp.history import TreeLoop, JitCellToken
-        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
+        from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat, BoxVector
         from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr, ConstFloat
         from pypy.jit.metainterp.history import BasicFailDescr, TargetToken
         from pypy.jit.metainterp.typesystem import llhelper
@@ -76,6 +76,9 @@
         class BoxRef(Box):
             type = 'p'
 
+        class BoxVector(Box):
+            type = 'e'
+
         class Const(object):
             def __init__(self, value=None):
                 self.value = value


More information about the pypy-commit mailing list