[pypy-commit] pypy vecopt2: added a new test to collapse guards, I plan to restructure this and make it on the level of a dependency graph
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:46:22 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77127:8f307136e6c5
Date: 2015-04-30 09:54 +0200
http://bitbucket.org/pypy/pypy/changeset/8f307136e6c5/
Log: added a new test to collapse guards, I plan to restructure this and
make it on the level of a dependency graph each Node class now has
the scheduled index as property
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1497,7 +1497,7 @@
def consider_vec_int_add(self, op):
count = op.getarg(2)
assert isinstance(count, ConstInt)
- itemsize = 16 // count.value
+ itemsize = self.assembler.cpu.vector_register_size // count.value
args = op.getarglist()
loc1 = self.xrm.make_sure_var_in_reg(op.getarg(1), args)
loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
@@ -1508,8 +1508,10 @@
# done on the vector register, if there is a wrap around,
# it is lost, because the register does not have enough bits
# to save it.
- argloc = self.loc(op.getarg(0))
- self.force_allocate_reg(op.result, selected_reg=argloc)
+ #argloc = self.loc(op.getarg(0))
+ self.xrm.force_result_in_reg(op.result, op.getarg(0))
+ if op.getarg(1).value != op.getarg(2).value:
+ raise NotImplementedError("signext not implemented")
def consider_guard_early_exit(self, op):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -74,6 +74,7 @@
self.memory_ref = None
self.pack = None
self.emitted = False
+ self.schedule_position = -1
def getoperation(self):
return self.op
@@ -643,16 +644,17 @@
del self.schedulable_nodes[index]
self.schedulable_nodes.append(node)
- def schedule_all(self, opindices):
+ def schedule_all(self, opindices, position):
while len(opindices) > 0:
opidx = opindices.pop()
for i,node in enumerate(self.schedulable_nodes):
if node == opidx:
- self.schedule(i)
+ self.schedule(i, position)
break
- def schedule(self, index):
+ def schedule(self, index, position):
node = self.schedulable_nodes[index]
+ node.schedule_position = position
del self.schedulable_nodes[index]
to_del = []
for dep in node.provides()[:]: # COPY
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -973,5 +973,27 @@
vopt = self.schedule(self.parse_loop(ops),1)
self.assert_equal(vopt.loop, self.parse_loop(opt))
+ def test_collapse_index_guard_1(self):
+ ops = """
+ [p0,i0]
+ guard_early_exit() []
+ i1 = getarrayitem_raw(p0, i0, descr=intarraydescr)
+ i2 = int_add(i0, 1)
+ i3 = int_lt(i2, 102)
+ guard_true(i3) [p0,i0]
+ jump(p0,i2)
+ """
+ opt="""
+ [p0,i0]
+ i2 = int_add(i0, 16)
+ i3 = int_lt(i2, 102)
+ guard_true(i3) [p0,i0]
+ i1 = vec_getarrayitem_raw(p0, i0, 16, descr=intarraydescr)
+ jump(p0,i2)
+ """
+ vopt = self.schedule(self.parse_loop(ops),15)
+ self.assert_equal(vopt.loop, self.parse_loop(opt))
+
+
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -96,10 +96,12 @@
self.find_adjacent_memory_refs()
self.extend_packset()
self.combine_packset()
+ self.collapse_index_guards()
self.schedule()
def emit_operation(self, op):
- if op.getopnum() == rop.GUARD_EARLY_EXIT:
+ if op.getopnum() == rop.GUARD_EARLY_EXIT or \
+ op.getopnum() == rop.DEBUG_MERGE_POINT:
return
self._last_emitted_op = op
self._newoperations.append(op)
@@ -261,8 +263,8 @@
operations = loop.operations
self.packset = PackSet(self.dependency_graph, operations,
- self.unroll_count,
- self.smallest_type_bytes)
+ self.unroll_count,
+ self.smallest_type_bytes)
memory_refs = self.dependency_graph.memory_refs.items()
# initialize the pack set
for node_a,memref_a in memory_refs:
@@ -354,20 +356,22 @@
pack = candidate.pack
if scheduler.schedulable(pack.operations):
vop = scheduler.sched_data.as_vector_operation(pack)
+ position = len(self._newoperations)
self.emit_operation(vop)
- scheduler.schedule_all(pack.operations)
+ scheduler.schedule_all(pack.operations, position)
else:
scheduler.schedule_later(0)
else:
+ if candidate.getopnum() == rop.GUARD_EARLY_EXIT:
+ pass
+ position = len(self._newoperations)
self.emit_operation(candidate.getoperation())
- scheduler.schedule(0)
+ scheduler.schedule(0, position)
if not we_are_translated():
for node in self.dependency_graph.nodes:
assert node.emitted
self.loop.operations = self._newoperations[:]
- #self.collapse_index_guards()
- #self.clear_newoperations()
def relax_index_guards(self):
label_idx = 0
@@ -411,24 +415,25 @@
guard_node.relax_guard_to(self.future_condition)
def collapse_index_guards(self):
- final_ops = []
- last_guard = None
- is_after_relax = False
- for op in self._newoperations:
- if op.getopnum() == rop.GUARD_EARLY_EXIT:
- assert last_guard is not None
- final_ops.append(last_guard)
- is_after_relax = True
- continue
- if not is_after_relax:
- if op.is_guard():
- last_guard = op
- else:
- final_ops.append(op)
- else:
- final_ops.append(op)
- assert is_after_relax
- return final_ops
+ pass
+ #final_ops = []
+ #last_guard = None
+ #is_after_relax = False
+ #for op in self._newoperations:
+ # if op.getopnum() == rop.GUARD_EARLY_EXIT:
+ # assert last_guard is not None
+ # final_ops.append(last_guard)
+ # is_after_relax = True
+ # continue
+ # if not is_after_relax:
+ # if op.is_guard():
+ # last_guard = op
+ # else:
+ # final_ops.append(op)
+ # else:
+ # final_ops.append(op)
+ #assert is_after_relax
+ #return final_ops
def must_unpack_result_to_exec(op, target_op):
diff --git a/rpython/jit/metainterp/test/test_vectorize.py b/rpython/jit/metainterp/test/test_vectorize.py
--- a/rpython/jit/metainterp/test/test_vectorize.py
+++ b/rpython/jit/metainterp/test/test_vectorize.py
@@ -64,23 +64,23 @@
myjitdriver = JitDriver(greens = [],
reds = ['i','d','va','vb','vc'],
vectorize=True)
- ET = rffi.SIGNED
- T = lltype.Array(ET, hints={'nolength': True})
+ T = lltype.Array(rffi.INT, hints={'nolength': True})
def f(d):
i = 0
va = lltype.malloc(T, d, flavor='raw', zero=True)
vb = lltype.malloc(T, d, flavor='raw', zero=True)
vc = lltype.malloc(T, d, flavor='raw', zero=True)
for j in range(d):
- va[j] = j
- vb[j] = j
+ va[j] = rffi.r_int(j)
+ vb[j] = rffi.r_int(j)
while i < d:
myjitdriver.can_enter_jit(i=i, d=d, va=va, vb=vb, vc=vc)
myjitdriver.jit_merge_point(i=i, d=d, va=va, vb=vb, vc=vc)
a = va[i]
b = vb[i]
- vc[i] = a+b
+ ec = intmask(a)+intmask(b)
+ vc[i] = rffi.r_int(ec)
i += 1
res = 0
More information about the pypy-commit
mailing list