[pypy-commit] pypy vecopt: resolved an issue that would generate wrong packing immediates for shufps.
plan_rich
noreply at buildbot.pypy.org
Fri May 15 09:39:19 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77328:2a8cae0c7c8e
Date: 2015-05-15 09:39 +0200
http://bitbucket.org/pypy/pypy/changeset/2a8cae0c7c8e/
Log: resolved an issue that would generate wrong packing immediates for
shufps. a better approach in the assembler is needed to handle these
pack/unpack instructions
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -6,10 +6,16 @@
from rpython.jit.metainterp.test.support import LLJitMixin
from rpython.jit.backend.x86.test.test_basic import Jit386Mixin
from rpython.jit.metainterp.warmspot import reset_jit, get_stats
+from rpython.jit.metainterp.jitprof import Profiler
+from rpython.rlib.jit import Counters
from pypy.module.micronumpy import boxes
from pypy.module.micronumpy.compile import FakeSpace, Parser, InterpreterState
from pypy.module.micronumpy.base import W_NDimArray
+def get_profiler():
+ from rpython.jit.metainterp import pyjitpl
+ return pyjitpl._warmrunnerdesc.metainterp_sd.profiler
+
class TestNumpyJit(Jit386Mixin):
graph = None
interp = None
@@ -79,12 +85,23 @@
listcomp=True,
backendopt=True,
graph_and_interp_only=True,
+ ProfilerClass=Profiler,
vectorize=True)
self.__class__.interp = interp
self.__class__.graph = graph
+ def check_vectorized(self, expected_tried, expected_success):
+ profiler = get_profiler()
+ tried = profiler.get_counter(Counters.OPT_VECTORIZE_TRY)
+ success = profiler.get_counter(Counters.OPT_VECTORIZED)
+ assert tried >= success
+ assert tried == expected_tried
+ assert success == expected_success
+
def run(self, name):
self.compile_graph()
+ profiler = get_profiler()
+ profiler.start()
reset_jit()
i = self.code_mapping[name]
retval = self.interp.eval_graph(self.graph, [i])
@@ -92,23 +109,25 @@
def define_float32_add():
return """
- a = |30|
+ a = astype(|30|, float32)
b = a + a
b -> 15
"""
def test_float32_add(self):
result = self.run("float32_add")
self.assert_float_equal(result, 15.0 + 15.0)
+ self.check_vectorized(2, 2)
def define_float_add():
return """
- a = astype(|30|, float32)
+ a = |30|
b = a + a
- b -> 17
+ b -> 15
"""
def test_float_add(self):
result = self.run("float_add")
self.assert_float_equal(result, 17.0 + 17.0)
+ self.check_vectorized(1, 1)
def define_float32_add_const():
return """
@@ -119,6 +138,7 @@
def test_float32_add_const(self):
result = self.run("float32_add_const")
self.assert_float_equal(result, 29.0 + 77.345)
+ self.check_vectorized(2, 2)
def define_float_add_const():
return """
@@ -128,6 +148,7 @@
def test_float_add_const(self):
result = self.run("float_add_const")
self.assert_float_equal(result, 29.0 + 25.5)
+ self.check_vectorized(1, 1)
def define_pow():
return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -7,7 +7,7 @@
DEBUG_COUNTER, debug_bridge)
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
-from rpython.jit.metainterp.history import Const, Box, VOID, BoxVector
+from rpython.jit.metainterp.history import Const, Box, VOID, BoxVector, ConstInt
from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
from rpython.rtyper.lltypesystem.lloperation import llop
@@ -2576,30 +2576,36 @@
return src_loc
select = 0
if item_type == FLOAT:
- self.mc.MOVSS(tmp_loc, src_loc)
- i = 0
- while i < count:
- select |= (index+i<<(i*2))
- i += 1
- self.mc.SHUFPS_xxi(tmp_loc.value, tmp_loc.value, select)
- return tmp_loc
+ if size == 4:
+ self.mc.MOVUPS(tmp_loc, src_loc) # TODO could be aligned if xx
+ i = 0
+ while i < count:
+ select |= (index+i<<(i*2))
+ i += 1
+ self.mc.SHUFPS_xxi(tmp_loc.value, tmp_loc.value, select)
+ return tmp_loc
+ else:
+ py.test.set_trace()
+ raise NotImplementedError("shuffle by index for float64 not impl")
else:
py.test.set_trace()
raise NotImplementedError("shuffle by index for non floats")
def genop_vec_box_pack(self, op, arglocs, resloc):
- toloc, fromloc, indexloc, sizeloc = arglocs
- toarg = op.getarg(0)
- index = indexloc.value
- size = sizeloc.value
+ toloc, fromloc, tmploc = arglocs
+ result = op.result
+ indexarg = op.getarg(2)
+ assert isinstance(result, BoxVector)
+ assert isinstance(indexarg, ConstInt)
+ index = indexarg.value
+ size = result.item_size
+ #py.test.set_trace()
if size == 4:
- select = 0
+ select = (1 << 2) # move 0 -> 0, 1 -> 1 for toloc
+ # TODO
if index == 2:
- select |= (1<<0)
- select |= (2<<2)
- select |= (3<<4)
- select |= (4<<6)
+ select |= (1<<6) # move 0 -> 2, 1 -> 3 for fromloc
else:
raise NotImplementedError("index is not equal to 2")
@@ -2621,7 +2627,7 @@
self.mc.CVTPS2PD(resloc, loc0)
else:
assert index == 2
- self.mc.MOVSS_xx(tmploc.value, loc0.value)
+ self.mc.MOVUPS(tmploc, loc0) # TODO could be aligned if xx
select = (2<<0)|(3<<2) # move pos 2->0,3->1
self.mc.SHUFPS_xxi(tmploc.value, tmploc.value, select)
self.mc.CVTPS2PD(resloc, tmploc) # expand
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1477,7 +1477,7 @@
assert not descr.is_array_of_pointers() and \
not descr.is_array_of_structs()
itemsize, ofs, _ = unpack_arraydescr(descr)
- integer = not descr.is_array_of_floats()
+ integer = not (descr.is_array_of_floats() or descr.concrete_type == FLOAT)
aligned = False
args = op.getarglist()
base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
@@ -1498,7 +1498,7 @@
value_loc = self.make_sure_var_in_reg(op.getarg(2), args)
ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
- integer = not descr.is_array_of_floats()
+ integer = not (descr.is_array_of_floats() or descr.concrete_type == FLOAT)
aligned = False
self.perform_discard(op, [base_loc, ofs_loc, value_loc,
imm(itemsize), imm(ofs), imm(integer), imm(aligned)])
@@ -1536,15 +1536,13 @@
del consider_vec_logic
def consider_vec_box_pack(self, op):
- count = op.getarg(3)
- index = op.getarg(2)
- assert isinstance(count, ConstInt)
- assert isinstance(index, ConstInt)
- itemsize = self.assembler.cpu.vector_register_size // count.value
args = op.getarglist()
- loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
- self.perform(op, [loc0, loc1, imm(index.value), imm(itemsize)], None)
+ result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ tmpxvar = TempBox()
+ tmploc = self.xrm.force_allocate_reg(tmpxvar)
+ self.xrm.possibly_free_var(tmpxvar)
+ self.perform(op, [result, loc1, tmploc], result)
def consider_vec_box_unpack(self, op):
count = op.getarg(2)
diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -563,7 +563,7 @@
raise NotImplementedError("cannot forget value of vector")
def clonebox(self):
- return BoxVector(self.item_type, self.item_count)
+ return BoxVector(self.item_type, self.item_count, self.item_size, self.signed)
def constbox(self):
raise NotImplementedError("not possible to have a constant vector box")
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -1192,8 +1192,8 @@
v224 = vec_float_add(v219, v222, 2)
v225 = vec_cast_float_to_singlefloat(v223, 2)
v226 = vec_cast_float_to_singlefloat(v224, 2)
- vec_box_pack(v225, v226, 2, 2)
- vec_raw_store(p2, i4, v225, 4, descr=singlefloatarraydescr)
+ v227 = vec_box_pack(v225, v226, 2, 2)
+ vec_raw_store(p2, i4, v227, 4, descr=singlefloatarraydescr)
jump(p0, p1, p2, i210, i189)
"""
vopt = self.vectorize(self.parse_loop(ops))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -13,6 +13,7 @@
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.debug import debug_print, debug_start, debug_stop
+from rpython.rlib.jit import Counters
from rpython.rtyper.lltypesystem import lltype, rffi
class NotAVectorizeableLoop(JitException):
@@ -42,10 +43,10 @@
inline_short_preamble, start_state, False)
orig_ops = loop.operations
try:
- jitdriver_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
+ metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
opt.propagate_all_forward()
- jitdriver_sd.profiler.count(Counters.OPT_VECTORIZED)
+ metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
except NotAVectorizeableLoop:
# vectorization is not possible, propagate only normal optimizations
loop.operations = orig_ops
@@ -690,8 +691,6 @@
else:
# vbox of a variable/constant is not present here
pass
- if not we_are_translated():
- assert ptype.is_valid()
self.pack.ptype = ptype
def vector_result(self, vop, packargs):
@@ -731,6 +730,7 @@
if packed < packable:
args = [op.getoperation().getarg(argidx) for op in ops]
self.package(vbox, packed, args, packable)
+ _, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
vop.setarg(argidx, vbox)
return vbox
@@ -749,13 +749,40 @@
if pos == -1:
i += 1
continue
+ new_box = tgt_box.clonebox()
+ new_box.item_count += src_box.item_count
op = ResOperation(rop.VEC_BOX_PACK,
[tgt_box, src_box, ConstInt(i),
- ConstInt(src_box.item_count)], None)
+ ConstInt(src_box.item_count)], new_box)
self.preamble_ops.append(op)
- tgt_box.item_count += src_box.item_count
+ self._check_vec_pack(op)
i += src_box.item_count
+ # overwrite the new positions, arguments now live in new_box
+ # at a new position
+ for j in range(i):
+ arg = args[j]
+ self.box_to_vbox[arg] = (j, new_box)
+
+ def _check_vec_pack(self, op):
+ result = op.result
+ arg0 = op.getarg(0)
+ arg1 = op.getarg(1)
+ index = op.getarg(2)
+ count = op.getarg(3)
+ assert isinstance(result, BoxVector)
+ assert isinstance(arg0, BoxVector)
+ assert isinstance(index, ConstInt)
+ assert isinstance(count, ConstInt)
+ assert arg0.item_size == result.item_size
+ if isinstance(arg1, BoxVector):
+ assert arg1.item_size == result.item_size
+ else:
+ assert count.value == 1
+ assert index.value < result.item_size
+ assert index.value + count.value <= result.item_size
+ assert result.item_count > arg0.item_count
+
def expand_box_to_vector_box(self, vop, argidx):
arg = vop.getarg(argidx)
all_same_box = True
More information about the pypy-commit
mailing list