[pypy-commit] pypy vecopt: typed expand to float/int
plan_rich
noreply at buildbot.pypy.org
Fri May 15 18:40:24 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77340:f82a4d3d1831
Date: 2015-05-15 18:40 +0200
http://bitbucket.org/pypy/pypy/changeset/f82a4d3d1831/
Log: typed expand to float/int added extract/insert integer x86 ops added
missing add/sub/mul/div float ops new unpack/pack impl int (to be
tested) test_zjit numbers can be suffixed with i/f to show their
type
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -538,9 +538,16 @@
def __repr__(self):
return '(%r %s %r)' % (self.lhs, self.name, self.rhs)
-class FloatConstant(Node):
+class NumberConstant(Node):
def __init__(self, v):
- self.v = float(v)
+ assert len(v) > 0
+ c = v[-1]
+ if c == 'f':
+ self.v = float(v[:-1])
+ elif c == 'i':
+ self.v = int(v[:-1])
+ else:
+ self.v = float(v)
def __repr__(self):
return "Const(%s)" % self.v
@@ -766,7 +773,7 @@
return W_NDimArray.new_scalar(interp.space, dtype, w_res)
_REGEXES = [
- ('-?[\d\.]+', 'number'),
+ ('-?[\d\.]+(i|f)?', 'number'),
('\[', 'array_left'),
(':', 'colon'),
('\w+', 'identifier'),
@@ -840,7 +847,7 @@
start = 0
else:
if tokens.get(0).name != 'colon':
- return FloatConstant(start_tok.v)
+ return NumberConstant(start_tok.v)
start = int(start_tok.v)
tokens.pop()
if not tokens.get(0).name in ['colon', 'number']:
@@ -938,7 +945,7 @@
while True:
token = tokens.pop()
if token.name == 'number':
- elems.append(FloatConstant(token.v))
+ elems.append(NumberConstant(token.v))
elif token.name == 'array_left':
elems.append(ArrayConstant(self.parse_array_const(tokens)))
elif token.name == 'paren_left':
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -150,6 +150,40 @@
self.assert_float_equal(result, 29.0 + 25.5)
self.check_vectorized(1, 1)
+ def define_int_add_const():
+ return """
+ a = astype(|30|, int)
+ b = a + 1i
+ c = a + 2.0
+ x1 = b -> 7
+ x2 = b -> 8
+ x3 = c -> 11
+ x4 = c -> 12
+ x1 + x2 + x3 + x4
+ """
+ def test_int_add_const(self):
+ result = self.run("int_add_const")
+ assert int(result) == 7+1+8+1+11+2+12+2
+ self.check_vectorized(1, 1)
+
+ def define_int32_add_const():
+ return """
+ a = astype(|30|, int32)
+ b = a + 1i
+ c = a + 2.0
+ x1 = b -> 7
+ x2 = b -> 8
+ x3 = c -> 11
+ x4 = c -> 12
+ x1 + x2 + x3 + x4
+ """
+ def test_int32_add_const(self):
+ result = self.run("int32_add_const")
+ assert int(result) == 7+1+8+1+11+2+12+2
+ self.check_vectorized(1, 1)
+
+
+
def define_pow():
return """
a = |30| ** 2
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2544,9 +2544,19 @@
del genop_vec_float_arith
def genop_vec_int_signext(self, op, arglocs, resloc):
- pass
+ srcloc, sizeloc, tosizeloc = arglocs
+ size = sizeloc.value
+ tosize = tosizeloc.value
+ if size == 8 and tosize == 4:
+ # is there a better sequence to move them?
+ self.mc.MOVDQU(resloc, srcloc)
+ self.mc.PSRLDQ(srcloc, 8)
+ self.mc.PUNPCKLDQ(resloc, srcloc)
+ else:
+ py.test.set_trace()
+ raise NotImplementedError("sign ext missing")
- def genop_vec_expand(self, op, arglocs, resloc):
+ def genop_vec_float_expand(self, op, arglocs, resloc):
loc0, countloc = arglocs
count = countloc.value
if count == 1:
@@ -2554,24 +2564,6 @@
elif count == 2:
self.mc.MOVDDUP(resloc, loc0)
- def genop_vec_float_unpack(self, op, arglocs, resloc):
- loc0, tmploc, indexloc, countloc = arglocs
- count = countloc.value
- index = indexloc.value
- box = op.getarg(0)
- assert isinstance(box, BoxVector)
- item_type = box.item_type
- size = box.item_size
- if size == 4:
- tmploc = self._shuffle_by_index(loc0, tmploc, item_type, size, index, count)
- self.mc.MOVD32_rx(resloc.value, tmploc.value)
- elif size == 8:
- pass
- #if index == 1:
- # self.mc.SHUFPD_xxi(resloc, loc0, 0|(1<<2))
- #else:
- # self.mc.UNPCKHPD(resloc, loc0)
-
def _shuffle_by_index(self, src_loc, tmp_loc, item_type, size, index, count):
if index == 0 and count == 1:
return src_loc
@@ -2586,13 +2578,10 @@
self.mc.SHUFPS_xxi(tmp_loc.value, tmp_loc.value, select)
return tmp_loc
else:
- py.test.set_trace()
raise NotImplementedError("shuffle by index for float64 not impl")
else:
- py.test.set_trace()
raise NotImplementedError("shuffle by index for non floats")
-
def genop_vec_float_pack(self, op, arglocs, resloc):
resultloc, fromloc, tmploc = arglocs
result = op.result
@@ -2622,9 +2611,73 @@
elif size == 8:
raise NotImplementedError("pack: float double pack")
+ def genop_vec_int_pack(self, op, arglocs, resloc):
+ resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs
+ size = sizeloc.value
+ srcidx = srcidxloc.value
+ residx = residxloc.value
+ count = countloc.value
+ si = srcidx
+ ri = residx
+ k = count
+ while k > 0:
+ if size == 8:
+ if resultloc.is_xmm:
+ self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRQ_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ else:
+ self.mc.PEXTRQ_rxi(resloc.value, sourceloc.value, si)
+ elif size == 4:
+ if resultloc.is_xmm:
+ self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRD_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ else:
+ self.mc.PEXTRD_rxi(resloc.value, sourceloc.value, si)
+ elif size == 2:
+ if resultloc.is_xmm:
+ self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRW_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ else:
+ self.mc.PEXTRW_rxi(resloc.value, sourceloc.value, si)
+ elif size == 1:
+ if resultloc.is_xmm:
+ self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRB_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ else:
+ self.mc.PEXTRB_rxi(resloc.value, sourceloc.value, si)
+ si += 1
+ ri += 1
+ k -= 1
+
+ genop_vec_int_unpack = genop_vec_int_pack
+
+ def genop_vec_float_unpack(self, op, arglocs, resloc):
+ loc0, tmploc, indexloc, countloc = arglocs
+ count = countloc.value
+ index = indexloc.value
+ box = op.getarg(0)
+ assert isinstance(box, BoxVector)
+ item_type = box.item_type
+ size = box.item_size
+ if size == 4:
+ tmploc = self._shuffle_by_index(loc0, tmploc, item_type, size, index, count)
+ self.mc.MOVD32_rx(resloc.value, tmploc.value)
+ elif size == 8:
+ pass
+ #if index == 1:
+ # self.mc.SHUFPD_xxi(resloc, loc0, 0|(1<<2))
+ #else:
+ # self.mc.UNPCKHPD(resloc, loc0)
+
+
def genop_vec_cast_float_to_singlefloat(self, op, arglocs, resloc):
- argloc, _ = arglocs
- self.mc.CVTPD2PS(resloc, argloc)
+ self.mc.CVTPD2PS(resloc, arglocs[0])
+
+ def genop_vec_cast_float_to_int(self, op, arglocs, resloc):
+ self.mc.CVTPD2DQ(resloc, arglocs[0])
+
+ def genop_vec_cast_int_to_float(self, op, arglocs, resloc):
+ self.mc.CVTDQ2PD(resloc, arglocs[0])
def genop_vec_cast_singlefloat_to_float(self, op, arglocs, resloc):
loc0, tmploc, indexloc = arglocs
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -22,7 +22,8 @@
from rpython.jit.codewriter import longlong
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
- ConstFloat, BoxInt, BoxFloat, INT, REF, FLOAT, VECTOR, TargetToken)
+ ConstFloat, BoxInt, BoxFloat, BoxVector, INT, REF, FLOAT, VECTOR,
+ TargetToken)
from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.rlib import rgc
from rpython.rlib.objectmodel import we_are_translated
@@ -1556,23 +1557,57 @@
self.xrm.possibly_free_var(tmpxvar)
self.perform(op, [loc0, tmploc, imm(index.value), imm(count.value)], result)
- def consider_vec_expand(self, op):
- count = op.getarg(1)
+ def consider_vec_int_pack(self, op):
+ index = op.getarg(2)
+ count = op.getarg(3)
+ assert isinstance(index, ConstInt)
+ assert isinstance(count, ConstInt)
args = op.getarglist()
- loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
- result = self.force_allocate_reg(op.result, args)
- self.perform(op, [loc0, imm(count.value)], result)
+ srcloc = self.make_sure_var_in_reg(op.getarg(1), args)
+ resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ residx = 0
+ assert isinstance(op.result, BoxVector)
+ args = op.getarglist()
+ size = op.result.item_size
+ arglocs = [resloc, srcloc, imm(residx), imm(index.value), imm(count.value), imm(size)]
+ self.perform(op, arglocs, resloc)
+
+ def consider_vec_int_unpack(self, op):
+ index = op.getarg(1)
+ count = op.getarg(2)
+ assert isinstance(index, ConstInt)
+ assert isinstance(count, ConstInt)
+ args = op.getarglist()
+ srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
+ resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ residx = 0
+ assert isinstance(op.result, BoxVector)
+ args = op.getarglist()
+ size = op.result.item_size
+ arglocs = [resloc, srcloc, imm(residx), imm(index.value), imm(count.value), imm(size)]
+ self.perform(op, arglocs, resloc)
+
+ def consider_vec_float_expand(self, op):
+ args = op.getarglist()
+ srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
+ resloc = self.force_allocate_reg(op.result, args)
+ vres = op.result
+ assert isinstance(vres, BoxVector)
+ count = vres.item_count
+ size = vres.item_size
+ self.perform(op, [srcloc, imm(size), imm(count)], resloc)
def consider_vec_int_signext(self, op):
- # there is not much we can do in this case. arithmetic is
- # done on the vector register, if there is a wrap around,
- # it is lost, because the register does not have enough bits
- # to save it.
- #argloc = self.loc(op.getarg(0))
- self.xrm.force_result_in_reg(op.result, op.getarg(0))
- #if op.getarg(1).value != op.getarg(2).value:
- # raise NotImplementedError("signext not implemented")
-
+ args = op.getarglist()
+ srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
+ resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ sizearg = op.getarg(0)
+ result = op.result
+ assert isinstance(sizearg, BoxVector)
+ assert isinstance(result, BoxVector)
+ size = sizearg.item_size
+ tosize = result.item_size
+ self.perform(op, [srcloc, imm(size), imm(tosize)], resloc)
def consider_vec_box(self, op):
# pseudo instruction, needed to create a new variable
@@ -1583,6 +1618,7 @@
def consider_vec_cast_float_to_singlefloat(self, op):
count = op.getarg(1)
+ assert isinstance(count, ConstInt)
args = op.getarglist()
loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
@@ -1590,6 +1626,7 @@
def consider_vec_cast_singlefloat_to_float(self, op):
index = op.getarg(1)
+ assert isinstance(index, ConstInt)
args = op.getarglist()
loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
result = self.force_allocate_reg(op.result, args)
@@ -1598,6 +1635,16 @@
self.xrm.possibly_free_var(tmpxvar)
self.perform(op, [loc0, tmploc, imm(index.value)], result)
+ def consider_vec_cast_float_to_int(self, op):
+ count = op.getarg(1)
+ assert isinstance(count, ConstInt)
+ args = op.getarglist()
+ loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
+ result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ self.perform(op, [loc0, imm(count.value)], result)
+
+ consider_vec_cast_int_to_float = consider_vec_cast_float_to_int
+
# ________________________________________
def not_implemented_op(self, op):
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -648,10 +648,20 @@
MOVUPS = _binaryop('MOVUPS')
MOVUPD = _binaryop('MOVUPD')
ADDSD = _binaryop('ADDSD')
- ADDPD = _binaryop('ADDPD')
SUBSD = _binaryop('SUBSD')
MULSD = _binaryop('MULSD')
DIVSD = _binaryop('DIVSD')
+
+ # packed
+ ADDPD = _binaryop('ADDPD')
+ ADDPS = _binaryop('ADDPS')
+ SUBPD = _binaryop('SUBPD')
+ SUBPS = _binaryop('SUBPS')
+ MULPD = _binaryop('MULPD')
+ MULPS = _binaryop('MULPS')
+ DIVPD = _binaryop('DIVPD')
+ DIVPS = _binaryop('DIVPS')
+
UCOMISD = _binaryop('UCOMISD')
CVTSI2SD = _binaryop('CVTSI2SD')
CVTTSD2SI = _binaryop('CVTTSD2SI')
@@ -659,6 +669,8 @@
CVTSS2SD = _binaryop('CVTSS2SD')
CVTPD2PS = _binaryop('CVTPD2PS')
CVTPS2PD = _binaryop('CVTPS2PD')
+ CVTPD2DQ = _binaryop('CVTPD2DQ')
+ CVTDQ2PD = _binaryop('CVTDQ2PD')
SQRTSD = _binaryop('SQRTSD')
@@ -670,10 +682,14 @@
PADDW = _binaryop('PADDW')
PADDB = _binaryop('PADDB')
PSUBQ = _binaryop('PSUBQ')
+ PSUBD = _binaryop('PSUBD')
+ PSUBW = _binaryop('PSUBW')
+ PSUBQ = _binaryop('PSUBQ')
PAND = _binaryop('PAND')
POR = _binaryop('POR')
PXOR = _binaryop('PXOR')
PCMPEQD = _binaryop('PCMPEQD')
+ PSRLDQ = _binaryop('PSRLDQ')
MOVDQ = _binaryop('MOVDQ')
MOVD32 = _binaryop('MOVD32')
@@ -685,6 +701,11 @@
UNPCKHPS = _binaryop('UNPCKHPS')
UNPCKLPS = _binaryop('UNPCKLPS')
+ PUNPCKLQDQ = _binaryop('PUNPCKLQDQ')
+ PUNPCKHQDQ = _binaryop('PUNPCKHQDQ')
+ PUNPCKLDQ = _binaryop('PUNPCKLDQ')
+ PUNPCKHDQ = _binaryop('PUNPCKHDQ')
+
CALL = _relative_unaryop('CALL')
JMP = _relative_unaryop('JMP')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -710,6 +710,8 @@
CVTPD2PS_xx = xmminsn('\x66', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0')
CVTPS2PD_xx = xmminsn(rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0')
+ CVTDQ2PD_xx = xmminsn('\xF3', rex_nw, '\x0F\xE6', register(1, 8), register(2), '\xC0')
+ CVTPD2DQ_xx = xmminsn('\xF2', rex_nw, '\x0F\xE6', register(1, 8), register(2), '\xC0')
# These work on machine sized registers, so "MOVDQ" is MOVD when running
# on 32 bits and MOVQ when running on 64 bits. "MOVD32" is always 32-bit.
@@ -718,6 +720,7 @@
MOVDQ_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
MOVDQ_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
MOVDQ_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2))
+ MOVDQ_xx = xmminsn('\xF3', rex_nw, '\x0F\x7E', register(1, 8), register(2), '\xC0')
MOVD32_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
MOVD32_sx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), stack_sp(1))
@@ -729,14 +732,26 @@
MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), '\xC0')
- PSRLDQ_xi = xmminsn('\x66\x0F\x73', orbyte(0xd8), mem_reg_plus_const(1))
+ PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1,8), immediate(2, 'b'))
UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
UNPCKHPD_xx = xmminsn('\x66', rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0')
UNPCKLPS_xx = xmminsn( rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
UNPCKHPS_xx = xmminsn( rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0')
MOVDDUP_xx = xmminsn('\xF2', rex_nw, '\x0F\x12', register(1, 8), register(2), '\xC0')
SHUFPS_xxi = xmminsn(rex_nw, '\x0F\xC6', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- # SSE4.1 PEXTRDD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(1,8), register(2), immediate(3,'b'))
+
+ PSHUFD_xxi = xmminsn('\x66', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+
+ # following require SSE4_1
+ PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC4', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PINSRB_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x20', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+
# ------------------------------------------------------------
Conditions = {
@@ -963,6 +978,9 @@
define_pxmm_insn('POR_x*', '\xEB')
define_pxmm_insn('PXOR_x*', '\xEF')
define_pxmm_insn('PUNPCKLDQ_x*', '\x62')
+define_pxmm_insn('PUNPCKHDQ_x*', '\x6A')
+define_pxmm_insn('PUNPCKLQDQ_x*', '\x6C')
+define_pxmm_insn('PUNPCKHQDQ_x*', '\x6D')
define_pxmm_insn('PCMPEQD_x*', '\x76')
# ____________________________________________________________
diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -513,6 +513,8 @@
# ____________________________________________________________
class PrimitiveTypeMixin(object):
+ _mixin_ = True
+
def gettype(self):
raise NotImplementedError
def getsize(self):
@@ -547,6 +549,7 @@
_extended_display = False
def __init__(self, item_type=FLOAT, item_count=2, item_size=8, signed=True):
+ assert item_type in (FLOAT, INT)
self.item_type = item_type
self.item_count = item_count
self.item_size = item_size
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -405,7 +405,7 @@
cj = ConstInt(j)
ci = ConstInt(1)
opnum = rop.VEC_FLOAT_UNPACK
- if vbox.type == INT:
+ if vbox.item_type == INT:
opnum = rop.VEC_INT_UNPACK
unpack_op = ResOperation(opnum, [vbox, cj, ci], arg_cloned)
self.emit_operation(unpack_op)
@@ -534,6 +534,7 @@
UNKNOWN_TYPE = '-'
def __init__(self, type, size, signed):
+ assert type in (FLOAT, INT, PackType.UNKNOWN_TYPE)
self.type = type
self.size = size
self.signed = signed
@@ -563,7 +564,8 @@
def record_vbox(self, vbox):
if self.type == PackType.UNKNOWN_TYPE:
- self.type = vbox.type
+ self.type = vbox.item_type
+ assert self.type in (FLOAT, INT)
self.signed = vbox.signed
if vbox.item_size > self.size:
self.size = vbox.item_size
@@ -609,10 +611,10 @@
rop.VEC_RAW_STORE: PackArgs((2,), result=False),
rop.VEC_SETARRAYITEM_RAW: PackArgs((2,), result=False),
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: PackArgs((0,), result_type=PackType(FLOAT, 4, True)),
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: PackArgs((0,), result_type=PackType(FLOAT, 8, True), index=1),
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: PackArgs((0,), result_type=PackType(FLOAT, 4, False)),
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: PackArgs((0,), result_type=PackType(FLOAT, 8, False), index=1),
rop.VEC_CAST_FLOAT_TO_INT: PackArgs((0,), result_type=PackType(INT, 8, True)),
- rop.VEC_CAST_INT_TO_FLOAT: PackArgs((0,), result_type=PackType(FLOAT, 8, True)),
+ rop.VEC_CAST_INT_TO_FLOAT: PackArgs((0,), result_type=PackType(FLOAT, 8, False)),
}
@@ -722,15 +724,13 @@
ops = self.pack.operations
_, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
if not vbox:
- if expand:
- vbox = self.expand_box_to_vector_box(vop, argidx)
- else:
- assert False, "not allowed to expand" \
- ", but do not have a vector box as arg"
+ vbox = self.expand_box_to_vector_box(vop, argidx)
# vbox is a primitive type mixin
packable = self.vec_reg_size // self.pack.ptype.getsize()
packed = vbox.item_count
if packed < packable:
+ # due to casting problems values might be scattered along
+ # different vector boxes
args = [op.getoperation().getarg(argidx) for op in ops]
self.package(vbox, packed, args, packable)
_, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
@@ -745,7 +745,7 @@
v1/2 = [A,B,X,Y]
"""
opnum = rop.VEC_FLOAT_PACK
- if tgt_box.type == INT:
+ if tgt_box.item_type == INT:
opnum = rop.VEC_INT_PACK
arg_count = len(args)
i = index
@@ -801,9 +801,16 @@
break
i += 1
- vbox = BoxVector(arg.type, self.pack_ops)
+ box_type = arg.type
+ if isinstance(arg, BoxVector):
+ box_type = arg.item_type
+ expand_opnum = rop.VEC_FLOAT_EXPAND
+ if box_type == INT:
+ expand_opnum = rop.VEC_INT_EXPAND
+
+ vbox = BoxVector(box_type, self.pack_ops)
if all_same_box:
- expand_op = ResOperation(rop.VEC_EXPAND, [arg, ConstInt(self.pack_ops)], vbox)
+ expand_op = ResOperation(expand_opnum, [arg], vbox)
self.preamble_ops.append(expand_op)
else:
resop = ResOperation(rop.VEC_BOX, [ConstInt(self.pack_ops)], vbox)
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -475,7 +475,8 @@
'VEC_FLOAT_PACK/4', # VEC_FLOAT_PACK(vX, var/const, index, item_count)
'VEC_INT_UNPACK/3', # iX|fX = VEC_INT_UNPACK(vX, index, item_count)
'VEC_INT_PACK/4', # VEC_INT_PACK(vX, var/const, index, item_count)
- 'VEC_EXPAND/2', # vX = VEC_EXPAND(var/const, item_count)
+ 'VEC_FLOAT_EXPAND/1', # vX = VEC_FLOAT_EXPAND(var/const, item_count)
+ 'VEC_INT_EXPAND/1', # vX = VEC_INT_EXPAND(var/const, item_count)
'VEC_BOX/1',
'_VEC_PURE_LAST',
#
More information about the pypy-commit
mailing list