[pypy-commit] pypy vecopt2: forgot to attach the new files, added another unrolling test case and added some comments to the test cases
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:45:13 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77067:83d24715296e
Date: 2015-03-10 18:05 +0100
http://bitbucket.org/pypy/pypy/changeset/83d24715296e/
Log: forgot to attach the new files, added another unrolling test case
and added some comments to the test cases
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
deleted file mode 100644
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import py
-from rpython.rlib.objectmodel import instantiate
-from rpython.jit.metainterp.optimizeopt.test.test_util import (
- LLtypeMixin, BaseTest, FakeMetaInterpStaticData, convert_old_style_to_targets)
-from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
-from rpython.jit.metainterp.optimizeopt import optimize_trace
-import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt
-import rpython.jit.metainterp.optimizeopt.virtualize as virtualize
-from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
-from rpython.jit.metainterp.optimizeopt.unroll import Inliner
-from rpython.jit.metainterp.optimizeopt.unfold import OptUnfold
-from rpython.jit.metainterp.optimize import InvalidLoop
-from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string
-from rpython.jit.metainterp import executor, compile, resume
-from rpython.jit.metainterp.resoperation import rop, ResOperation
-from rpython.rlib.rarithmetic import LONG_BIT
-
-class DepTestHelper(BaseTest):
-
- enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unfold"
-
- def build_dependency(self, ops):
- loop = self.parse_loop(ops)
- return DependencyGraph(None, loop)
-
- def parse_loop(self, ops):
- loop = self.parse(ops, postprocess=self.postprocess)
- token = JitCellToken()
- loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None,
- descr=TargetToken(token))] + loop.operations
- if loop.operations[-1].getopnum() == rop.JUMP:
- loop.operations[-1].setdescr(token)
- return loop
-
- def assert_unfold_loop(self, loop, unroll_factor, unfolded_loop, call_pure_results=None):
- OptUnfold.force_unroll_factor = unroll_factor
- optloop = self._do_optimize_loop(loop, call_pure_results, export_state=True)
- self.assert_equal(optloop, unfolded_loop)
-
- def assert_def_use(self, graph, from_instr_index, to_instr_index):
- assert graph.instr_dependency(from_instr_index,
- to_instr_index) is not None, \
- " it is expected that instruction at index" + \
- " %d depend on instr on index %d but it is not" \
- % (from_instr_index, to_instr_index)
-
-class BaseTestDependencyGraph(DepTestHelper):
- def test_simple(self):
- ops = """
- []
- i1 = int_add(1,1)
- i2 = int_add(i1,1)
- guard_value(i2,3) []
- jump()
- """
- dep_graph = self.build_dependency(ops)
- self.assert_def_use(dep_graph, 1, 2)
- self.assert_def_use(dep_graph, 2, 3)
-
- def test_label_def(self):
- ops = """
- [i3]
- i1 = int_add(i3,1)
- guard_value(i1,0) []
- jump(i1)
- """
- dep_graph = self.build_dependency(ops)
- self.assert_def_use(dep_graph, 0, 1)
- self.assert_def_use(dep_graph, 1, 2)
- self.assert_def_use(dep_graph, 1, 3)
-
- def test_unroll(self):
- ops = """
- [p0,p1,p2,i0]
- i1 = raw_load(p1, i0, descr=floatarraydescr)
- i2 = raw_load(p2, i0, descr=floatarraydescr)
- i3 = int_add(i1,i2)
- raw_store(p0, i0, i3, descr=floatarraydescr)
- i4 = int_add(i0, 1)
- i5 = int_le(i4, 10)
- guard_true(i5) []
- jump(p0,p1,p2,i4)
- """
- unfolded_ops = """
- [p0,p1,p2,i0]
- i1 = raw_load(p1, i0, descr=floatarraydescr)
- i2 = raw_load(p2, i0, descr=floatarraydescr)
- i3 = int_add(i1,i2)
- raw_store(p0, i0, i3, descr=floatarraydescr)
- i4 = int_add(i0, 1)
- i5 = int_le(i4, 10)
- guard_true(i5) []
- i6 = raw_load(p1, i4, descr=floatarraydescr)
- i7 = raw_load(p2, i4, descr=floatarraydescr)
- i8 = int_add(i6,i7)
- raw_store(p0, i4, i8, descr=floatarraydescr)
- i9 = int_add(i4, 1)
- i10 = int_le(i9, 10)
- guard_true(i10) []
- jump(p0,p1,p2,i9)
- """
- self.assert_unfold_loop(self.parse_loop(ops), 2, self.parse_loop(unfolded_ops))
-
-class TestLLtype(BaseTestDependencyGraph, LLtypeMixin):
- pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -0,0 +1,219 @@
+import py
+from rpython.rlib.objectmodel import instantiate
+from rpython.jit.metainterp.optimizeopt.test.test_util import (
+ LLtypeMixin, BaseTest, FakeMetaInterpStaticData, convert_old_style_to_targets)
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
+from rpython.jit.metainterp.optimizeopt import optimize_trace
+import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt
+import rpython.jit.metainterp.optimizeopt.virtualize as virtualize
+from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
+from rpython.jit.metainterp.optimizeopt.unroll import Inliner
+from rpython.jit.metainterp.optimizeopt.vectorize import OptVectorize
+from rpython.jit.metainterp.optimize import InvalidLoop
+from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string
+from rpython.jit.metainterp import executor, compile, resume
+from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.rlib.rarithmetic import LONG_BIT
+
+class FakeJitDriverStaticData(object):
+ vectorize=True
+
+class DepTestHelper(BaseTest):
+
+ enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unfold"
+
+
+ jitdriver_sd = FakeJitDriverStaticData()
+
+ def build_dependency(self, ops):
+ loop = self.parse_loop(ops)
+ return DependencyGraph(None, loop)
+
+ def parse_loop(self, ops):
+ loop = self.parse(ops, postprocess=self.postprocess)
+ token = JitCellToken()
+ loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None,
+ descr=TargetToken(token))] + loop.operations
+ if loop.operations[-1].getopnum() == rop.JUMP:
+ loop.operations[-1].setdescr(token)
+ return loop
+
+ def assert_vectorize(self, loop, unfolded_loop, call_pure_results=None):
+ optloop = self._do_optimize_loop(loop, call_pure_results, export_state=True)
+ self.assert_equal(optloop, unfolded_loop)
+
+ def assert_unroll_loop_equals(self, loop, expected_loop, \
+ unroll_factor = -1, call_pure_results=None):
+ metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+ jitdriver_sd = FakeJitDriverStaticData()
+ opt = OptVectorize(metainterp_sd, jitdriver_sd, loop, [])
+ if unroll_factor == -1:
+ opt._gather_trace_information(loop)
+ unroll_factor = opt.get_estimated_unroll_factor()
+ opt_loop = opt.unroll_loop_iterations(loop, unroll_factor)
+ self.assert_equal(opt_loop, expected_loop)
+
+ def assert_def_use(self, graph, from_instr_index, to_instr_index):
+ assert graph.instr_dependency(from_instr_index,
+ to_instr_index) is not None, \
+ " it is expected that instruction at index" + \
+ " %d depend on instr on index %d but it is not" \
+ % (from_instr_index, to_instr_index)
+
+class BaseTestDependencyGraph(DepTestHelper):
+ def test_simple(self):
+ ops = """
+ []
+ i1 = int_add(1,1)
+ i2 = int_add(i1,1)
+ guard_value(i2,3) []
+ jump()
+ """
+ dep_graph = self.build_dependency(ops)
+ self.assert_def_use(dep_graph, 1, 2)
+ self.assert_def_use(dep_graph, 2, 3)
+
+ def test_label_def_use_jump_use_def(self):
+ ops = """
+ [i3]
+ i1 = int_add(i3,1)
+ guard_value(i1,0) []
+ jump(i1)
+ """
+ dep_graph = self.build_dependency(ops)
+ self.assert_def_use(dep_graph, 0, 1)
+ self.assert_def_use(dep_graph, 1, 2)
+ self.assert_def_use(dep_graph, 1, 3)
+
+ def test_vectorize_skip_impossible_1(self):
+ """ this trace does not contain a raw load / raw store from an array """
+ ops = """
+ [p0,i0]
+ i1 = int_add(i0,1)
+ i2 = int_le(i1, 10)
+ guard_true(i2) []
+ jump(p0,i1)
+ """
+ self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
+
+ def test_unroll_empty_stays_empty(self):
+ """ has no operations in this trace, thus it stays empty
+ after unrolling it 2 times """
+ ops = """
+ []
+ jump()
+ """
+ self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(ops), 2)
+
+ def test_unroll_empty_stays_empty_parameter(self):
+ """ same as test_unroll_empty_stays_empty but with a parameter """
+ ops = """
+ [i0]
+ jump(i0)
+ """
+ self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(ops), 2)
+
+ def test_vect_pointer_fails(self):
+ """ it currently rejects pointer arrays """
+ ops = """
+ [p0,i0]
+ raw_load(p0,i0,descr=arraydescr2)
+ jump(p0,i0)
+ """
+ self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
+
+ def test_vect_unroll_char(self):
+ """ a 16 byte vector register can hold 16 bytes thus
+ it is unrolled 16 times. (it is the smallest type in the trace) """
+ ops = """
+ [p0,i0]
+ raw_load(p0,i0,descr=chararraydescr)
+ jump(p0,i0)
+ """
+ opt_ops = """
+ [p0,i0]
+ {}
+ jump(p0,i0)
+ """.format(('\n' + ' ' *8).join(['raw_load(p0,i0,descr=chararraydescr)'] * 16))
+ self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(opt_ops))
+
+ def test_unroll_vector_addition(self):
+ """ a more complex trace doing vector addition (smallest type is float
+ 8 byte) """
+ ops = """
+ [p0,p1,p2,i0]
+ i1 = raw_load(p1, i0, descr=floatarraydescr)
+ i2 = raw_load(p2, i0, descr=floatarraydescr)
+ i3 = int_add(i1,i2)
+ raw_store(p0, i0, i3, descr=floatarraydescr)
+ i4 = int_add(i0, 1)
+ i5 = int_le(i4, 10)
+ guard_true(i5) []
+ jump(p0,p1,p2,i4)
+ """
+ opt_ops = """
+ [p0,p1,p2,i0]
+ i1 = raw_load(p1, i0, descr=floatarraydescr)
+ i2 = raw_load(p2, i0, descr=floatarraydescr)
+ i3 = int_add(i1,i2)
+ raw_store(p0, i0, i3, descr=floatarraydescr)
+ i4 = int_add(i0, 1)
+ i5 = int_le(i4, 10)
+ guard_true(i5) []
+ i6 = raw_load(p1, i4, descr=floatarraydescr)
+ i7 = raw_load(p2, i4, descr=floatarraydescr)
+ i8 = int_add(i6,i7)
+ raw_store(p0, i4, i8, descr=floatarraydescr)
+ i9 = int_add(i4, 1)
+ i10 = int_le(i9, 10)
+ guard_true(i10) []
+ jump(p0,p1,p2,i9)
+ """
+ self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(opt_ops), 2)
+
+class TestLLtype(BaseTestDependencyGraph, LLtypeMixin):
+ pass
+
+#class BaseTestVectorize(BaseTest):
+#
+# # vector instructions are not produced by the interpreter
+# # the optimization vectorize produces them
+# # load from from aligned memory example:
+# # vec = vec_aligned_raw_load(dst, index, sizeinbytes, descr)
+# # 'VEC_ALIGNED_RAW_LOAD/3d',
+# # store to aligned memory. example:
+# # vec_aligned_raw_store(dst, index, vector, sizeinbytes, descr)
+# # 'VEC_ALIGNED_RAW_STORE/4d',
+# # a list of operations on vectors
+# # add a vector: vec_int_add(v1, v2, 16)
+# # 'VEC_INT_ADD/3',
+#
+#class TestVectorize(BaseTestVectorize):
+#
+# def test_simple(self):
+# ops = """
+# [ia,ib,ic,i0]
+# ibi = raw_load(ib, i0, descr=arraydescr)
+# ici = raw_load(ic, i0, descr=arraydescr)
+# iai = int_add(ibi, ici)
+# raw_store(ia, i0, iai, descr=arraydescr)
+# i1 = int_add(i0,1)
+# ie = int_ge(i1,8)
+# guard_false(ie) [ia,ib,ic,i1]
+# jump(ia,ib,ic,i1)
+# """
+# expected = """
+# [ia,ib,ic,i0]
+# ibv = vec_raw_load(ib, i0, 16, descr=arraydescr)
+# icv = vec_raw_load(ic, i0, 16, descr=arraydescr)
+# iav = vec_int_add(ibi, ici, 16)
+# vec_raw_store(ia, i0, iai, 16, descr=arraydescr)
+# i1 = int_add(i0,4)
+# ie = int_ge(i1,8)
+# guard_false(ie) [ia,ib,ic,i1]
+# jump(ia,ib,ic,i1)
+# """
+# self.optimize_loop(ops, expected)
+#
+#class TestLLtype(TestVectorize, LLtypeMixin):
+# pass
diff --git a/rpython/jit/metainterp/optimizeopt/unfold.py b/rpython/jit/metainterp/optimizeopt/unfold.py
deleted file mode 100644
--- a/rpython/jit/metainterp/optimizeopt/unfold.py
+++ /dev/null
@@ -1,680 +0,0 @@
-import sys
-
-from rpython.jit.metainterp.history import TargetToken, JitCellToken, Const
-from rpython.jit.metainterp.inliner import Inliner
-from rpython.jit.metainterp.optimize import InvalidLoop
-from rpython.jit.metainterp.optimizeopt.generalize import KillHugeIntBounds
-from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization
-from rpython.jit.metainterp.optimizeopt.virtualstate import (VirtualStateConstructor,
- ShortBoxes, BadVirtualState, VirtualStatesCantMatch)
-from rpython.jit.metainterp.resoperation import rop, ResOperation, GuardResOp
-from rpython.jit.metainterp.resume import Snapshot
-from rpython.jit.metainterp import compile
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-
-
-def optimize_unfold(metainterp_sd, jitdriver_sd, loop, optimizations, start_state=None,
- export_state=True, unroll_factor=-1):
- opt = OptUnfold(metainterp_sd, jitdriver_sd, loop, optimizations, unroll_factor)
- return opt.propagate_all_forward(start_state, export_state)
-
-
-class UnfoldOptimizer(Optimizer):
- def setup(self):
- pass
-
-class OptUnfold(Optimization):
- """ In contrast to the loop unroll optimization this optimization
- unrolls the loop many times instead of just peeling off one trace.
- """
-
- inline_short_preamble = True
-
- def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations,
- unroll_factor):
- self.force_unroll_factor = unroll_factor
- self.optimizer = UnfoldOptimizer(metainterp_sd, jitdriver_sd,
- loop, optimizations)
-
- def _rename_arguments_ssa(self, rename_map, label_args, jump_args):
- # fill the map with the renaming boxes. keys are boxes from the label
- # values are the target boxes.
- for la,ja in zip(label_args, jump_args):
- if la != ja:
- rename_map[la] = ja
-
- def propagate_all_forward(self, starting_state, export_state=True):
-
- unroll_factor = self.force_unroll_factor
- if unroll_factor == -1:
- unroll_factor = 2 # TODO find a sensible factor. think about loop type?
-
- self.optimizer.exporting_state = export_state
- loop = self.optimizer.loop
- self.optimizer.clear_newoperations()
-
- label_op = loop.operations[0]
- jump_op = loop.operations[-1]
- operations = loop.operations[1:-1]
- loop.operations = []
-
- iterations = [[op.clone() for op in operations]]
- label_op_args = [self.getvalue(box).get_key_box() for box in label_op.getarglist()]
- values = [self.getvalue(box) for box in label_op.getarglist()]
- values[0].make_nonnull(self.optimizer)
-
- jump_op_args = jump_op.getarglist()
-
- rename_map = {}
- for unroll_i in range(2, unroll_factor+1):
- # for each unrolling factor the boxes are renamed.
- self._rename_arguments_ssa(rename_map, label_op_args, jump_op_args)
- iteration_ops = []
- for op in operations:
- copied_op = op.clone()
-
- if copied_op.result is not None:
- # every result assigns a new box, thus creates an entry
- # to the rename map.
- new_assigned_box = copied_op.result.clonebox()
- rename_map[copied_op.result] = new_assigned_box
- copied_op.result = new_assigned_box
-
- args = copied_op.getarglist()
- for i, arg in enumerate(args):
- try:
- value = rename_map[arg]
- copied_op.setarg(i, value)
- except KeyError:
- pass
-
- iteration_ops.append(copied_op)
-
- # the jump arguments have been changed
- # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop
- # must look like this: label(i(X+1)) ... jump(i(X+2))
-
- args = jump_op.getarglist()
- for i, arg in enumerate(args):
- try:
- value = rename_map[arg]
- jump_op.setarg(i, value)
- except KeyError:
- pass
- # map will be rebuilt, the jump operation has been updated already
- rename_map.clear()
-
- iterations.append(iteration_ops)
-
- # unwrap the loop nesting.
- loop.operations.append(label_op)
- for iteration in iterations:
- for op in iteration:
- loop.operations.append(op)
- loop.operations.append(jump_op)
-
-
- #start_label = loop.operations[0]
- #if start_label.getopnum() == rop.LABEL:
- # loop.operations = loop.operations[1:]
- # # We need to emit the label op before import_state() as emitting it
- # # will clear heap caches
- # self.optimizer.send_extra_operation(start_label)
- #else:
- # start_label = None
-
- #patchguardop = None
- #if len(loop.operations) > 1:
- # patchguardop = loop.operations[-2]
- # if patchguardop.getopnum() != rop.GUARD_FUTURE_CONDITION:
- # patchguardop = None
-
- #jumpop = loop.operations[-1]
- #if jumpop.getopnum() == rop.JUMP or jumpop.getopnum() == rop.LABEL:
- # loop.operations = loop.operations[:-1]
- #else:
- # jumpop = None
-
- #self.import_state(start_label, starting_state)
- #self.optimizer.propagate_all_forward(clear=False)
-
- #if not jumpop:
- # return
-
- #cell_token = jumpop.getdescr()
- #assert isinstance(cell_token, JitCellToken)
- #stop_label = ResOperation(rop.LABEL, jumpop.getarglist(), None, TargetToken(cell_token))
-
- #if jumpop.getopnum() == rop.JUMP:
- # if self.jump_to_already_compiled_trace(jumpop, patchguardop):
- # # Found a compiled trace to jump to
- # if self.short:
- # # Construct our short preamble
- # assert start_label
- # self.close_bridge(start_label)
- # return
-
- # if start_label and self.jump_to_start_label(start_label, stop_label):
- # # Initial label matches, jump to it
- # jumpop = ResOperation(rop.JUMP, stop_label.getarglist(), None,
- # descr=start_label.getdescr())
- # if self.short:
- # # Construct our short preamble
- # self.close_loop(start_label, jumpop, patchguardop)
- # else:
- # self.optimizer.send_extra_operation(jumpop)
- # return
-
- # if cell_token.target_tokens:
- # limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
- # if cell_token.retraced_count < limit:
- # cell_token.retraced_count += 1
- # debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
- # else:
- # debug_print("Retrace count reached, jumping to preamble")
- # assert cell_token.target_tokens[0].virtual_state is None
- # jumpop = jumpop.clone()
- # jumpop.setdescr(cell_token.target_tokens[0])
- # self.optimizer.send_extra_operation(jumpop)
- # return
-
- ## Found nothing to jump to, emit a label instead
-
- #if self.short:
- # # Construct our short preamble
- # assert start_label
- # self.close_bridge(start_label)
-
- #self.optimizer.flush()
- #if export_state:
- # KillHugeIntBounds(self.optimizer).apply()
-
- #loop.operations = self.optimizer.get_newoperations()
- #if export_state:
- # final_state = self.export_state(stop_label)
- #else:
- # final_state = None
- #loop.operations.append(stop_label)
- #return final_state
- return loop
-
- def get_virtual_state(self, args):
- modifier = VirtualStateConstructor(self.optimizer)
- return modifier.get_virtual_state(args)
-
- def fix_snapshot(self, jump_args, snapshot):
- if snapshot is None:
- return None
- snapshot_args = snapshot.boxes
- new_snapshot_args = []
- for a in snapshot_args:
- a = self.getvalue(a).get_key_box()
- new_snapshot_args.append(a)
- prev = self.fix_snapshot(jump_args, snapshot.prev)
- return Snapshot(prev, new_snapshot_args)
-
-
- def jump_to_start_label(self, start_label, stop_label):
- if not start_label or not stop_label:
- return False
-
- stop_target = stop_label.getdescr()
- start_target = start_label.getdescr()
- assert isinstance(stop_target, TargetToken)
- assert isinstance(start_target, TargetToken)
- return stop_target.targeting_jitcell_token is start_target.targeting_jitcell_token
-
-
- def export_state(self, targetop):
- original_jump_args = targetop.getarglist()
- jump_args = [self.getvalue(a).get_key_box() for a in original_jump_args]
-
- virtual_state = self.get_virtual_state(jump_args)
-
- values = [self.getvalue(arg) for arg in jump_args]
- inputargs = virtual_state.make_inputargs(values, self.optimizer)
- short_inputargs = virtual_state.make_inputargs(values, self.optimizer, keyboxes=True)
-
- if self.boxes_created_this_iteration is not None:
- for box in self.inputargs:
- self.boxes_created_this_iteration[box] = None
-
- short_boxes = ShortBoxes(self.optimizer, inputargs)
-
- self.optimizer.clear_newoperations()
- for i in range(len(original_jump_args)):
- srcbox = jump_args[i]
- if values[i].is_virtual():
- srcbox = values[i].force_box(self.optimizer)
- if original_jump_args[i] is not srcbox:
- op = ResOperation(rop.SAME_AS, [srcbox], original_jump_args[i])
- self.optimizer.emit_operation(op)
- inputarg_setup_ops = self.optimizer.get_newoperations()
-
- target_token = targetop.getdescr()
- assert isinstance(target_token, TargetToken)
- targetop.initarglist(inputargs)
- target_token.virtual_state = virtual_state
- target_token.short_preamble = [ResOperation(rop.LABEL, short_inputargs, None)]
-
- exported_values = {}
- for box in inputargs:
- exported_values[box] = self.optimizer.getvalue(box)
- for op in short_boxes.operations():
- if op and op.result:
- box = op.result
- exported_values[box] = self.optimizer.getvalue(box)
-
- return ExportedState(short_boxes, inputarg_setup_ops, exported_values)
-
- def import_state(self, targetop, exported_state):
- if not targetop: # Trace did not start with a label
- self.inputargs = self.optimizer.loop.inputargs
- self.short = None
- self.initial_virtual_state = None
- return
-
- self.inputargs = targetop.getarglist()
- target_token = targetop.getdescr()
- assert isinstance(target_token, TargetToken)
- if not exported_state:
- # No state exported, construct one without virtuals
- self.short = None
- virtual_state = self.get_virtual_state(self.inputargs)
- self.initial_virtual_state = virtual_state
- return
-
- self.short = target_token.short_preamble[:]
- self.short_seen = {}
- self.short_boxes = exported_state.short_boxes
- self.initial_virtual_state = target_token.virtual_state
-
- for box in self.inputargs:
- preamble_value = exported_state.exported_values[box]
- value = self.optimizer.getvalue(box)
- value.import_from(preamble_value, self.optimizer)
-
- # Setup the state of the new optimizer by emiting the
- # short operations and discarding the result
- self.optimizer.emitting_dissabled = True
- for op in exported_state.inputarg_setup_ops:
- self.optimizer.send_extra_operation(op)
-
- seen = {}
- for op in self.short_boxes.operations():
- self.ensure_short_op_emitted(op, self.optimizer, seen)
- if op and op.result:
- preamble_value = exported_state.exported_values[op.result]
- value = self.optimizer.getvalue(op.result)
- if not value.is_virtual() and not value.is_constant():
- imp = ValueImporter(self, preamble_value, op)
- self.optimizer.importable_values[value] = imp
- newvalue = self.optimizer.getvalue(op.result)
- newresult = newvalue.get_key_box()
- # note that emitting here SAME_AS should not happen, but
- # in case it does, we would prefer to be suboptimal in asm
- # to a fatal RPython exception.
- if newresult is not op.result and \
- not self.short_boxes.has_producer(newresult) and \
- not newvalue.is_constant():
- op = ResOperation(rop.SAME_AS, [op.result], newresult)
- self.optimizer._newoperations.append(op)
- #if self.optimizer.loop.logops:
- # debug_print(' Falling back to add extra: ' +
- # self.optimizer.loop.logops.repr_of_resop(op))
-
- self.optimizer.flush()
- self.optimizer.emitting_dissabled = False
-
- def close_bridge(self, start_label):
- inputargs = self.inputargs
- short_jumpargs = inputargs[:]
-
- # We dont need to inline the short preamble we are creating as we are conneting
- # the bridge to a different trace with a different short preamble
- self.short_inliner = None
-
- newoperations = self.optimizer.get_newoperations()
- self.boxes_created_this_iteration = {}
- i = 0
- while i < len(newoperations):
- self._import_op(newoperations[i], inputargs, short_jumpargs, [])
- i += 1
- newoperations = self.optimizer.get_newoperations()
- self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=start_label.getdescr()))
- self.finalize_short_preamble(start_label)
-
- def close_loop(self, start_label, jumpop, patchguardop):
- virtual_state = self.initial_virtual_state
- short_inputargs = self.short[0].getarglist()
- inputargs = self.inputargs
- short_jumpargs = inputargs[:]
-
- # Construct jumpargs from the virtual state
- original_jumpargs = jumpop.getarglist()[:]
- values = [self.getvalue(arg) for arg in jumpop.getarglist()]
- try:
- jumpargs = virtual_state.make_inputargs(values, self.optimizer)
- except BadVirtualState:
- raise InvalidLoop('The state of the optimizer at the end of ' +
- 'peeled loop is inconsistent with the ' +
- 'VirtualState at the beginning of the peeled ' +
- 'loop')
- jumpop.initarglist(jumpargs)
-
- # Inline the short preamble at the end of the loop
- jmp_to_short_args = virtual_state.make_inputargs(values,
- self.optimizer,
- keyboxes=True)
- assert len(short_inputargs) == len(jmp_to_short_args)
- args = {}
- for i in range(len(short_inputargs)):
- if short_inputargs[i] in args:
- if args[short_inputargs[i]] != jmp_to_short_args[i]:
- raise InvalidLoop('The short preamble wants the ' +
- 'same box passed to multiple of its ' +
- 'inputargs, but the jump at the ' +
- 'end of this bridge does not do that.')
-
- args[short_inputargs[i]] = jmp_to_short_args[i]
- self.short_inliner = Inliner(short_inputargs, jmp_to_short_args)
- self._inline_short_preamble(self.short, self.short_inliner,
- patchguardop, self.short_boxes.assumed_classes)
-
- # Import boxes produced in the preamble but used in the loop
- newoperations = self.optimizer.get_newoperations()
- self.boxes_created_this_iteration = {}
- i = j = 0
- while i < len(newoperations) or j < len(jumpargs):
- if i == len(newoperations):
- while j < len(jumpargs):
- a = jumpargs[j]
- #if self.optimizer.loop.logops:
- # debug_print('J: ' + self.optimizer.loop.logops.repr_of_arg(a))
- self.import_box(a, inputargs, short_jumpargs, jumpargs)
- j += 1
- else:
- self._import_op(newoperations[i], inputargs, short_jumpargs, jumpargs)
- i += 1
- newoperations = self.optimizer.get_newoperations()
-
- jumpop.initarglist(jumpargs)
- self.optimizer.send_extra_operation(jumpop)
- self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=jumpop.getdescr()))
-
- # Verify that the virtual state at the end of the loop is one
- # that is compatible with the virtual state at the start of the loop
- final_virtual_state = self.get_virtual_state(original_jumpargs)
- #debug_start('jit-log-virtualstate')
- #virtual_state.debug_print('Closed loop with ')
- bad = {}
- if not virtual_state.generalization_of(final_virtual_state, bad,
- cpu=self.optimizer.cpu):
- # We ended up with a virtual state that is not compatible
- # and we are thus unable to jump to the start of the loop
- #final_virtual_state.debug_print("Bad virtual state at end of loop, ",
- # bad)
- #debug_stop('jit-log-virtualstate')
- raise InvalidLoop('The virtual state at the end of the peeled ' +
- 'loop is not compatible with the virtual ' +
- 'state at the start of the loop which makes ' +
- 'it impossible to close the loop')
-
- #debug_stop('jit-log-virtualstate')
-
- maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
- if self.optimizer.emitted_guards > maxguards:
- target_token = jumpop.getdescr()
- assert isinstance(target_token, TargetToken)
- target_token.targeting_jitcell_token.retraced_count = sys.maxint
-
- self.finalize_short_preamble(start_label)
-
- def finalize_short_preamble(self, start_label):
- short = self.short
- assert short[-1].getopnum() == rop.JUMP
- target_token = start_label.getdescr()
- assert isinstance(target_token, TargetToken)
-
- # Turn guards into conditional jumps to the preamble
- for i in range(len(short)):
- op = short[i]
- if op.is_guard():
- op = op.clone()
- op.setfailargs(None)
- op.setdescr(None) # will be set to a proper descr when the preamble is used
- short[i] = op
-
- # Clone ops and boxes to get private versions and
- short_inputargs = short[0].getarglist()
- boxmap = {}
- newargs = [None] * len(short_inputargs)
- for i in range(len(short_inputargs)):
- a = short_inputargs[i]
- if a in boxmap:
- newargs[i] = boxmap[a]
- else:
- newargs[i] = a.clonebox()
- boxmap[a] = newargs[i]
- inliner = Inliner(short_inputargs, newargs)
- target_token.assumed_classes = {}
- for i in range(len(short)):
- op = short[i]
- newop = inliner.inline_op(op)
- if op.result and op.result in self.short_boxes.assumed_classes:
- target_token.assumed_classes[newop.result] = self.short_boxes.assumed_classes[op.result]
- short[i] = newop
-
- # Forget the values to allow them to be freed
- for box in short[0].getarglist():
- box.forget_value()
- for op in short:
- if op.result:
- op.result.forget_value()
- target_token.short_preamble = self.short
-
- def ensure_short_op_emitted(self, op, optimizer, seen):
- if op is None:
- return
- if op.result is not None and op.result in seen:
- return
- for a in op.getarglist():
- if not isinstance(a, Const) and a not in seen:
- self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer,
- seen)
-
- #if self.optimizer.loop.logops:
- # debug_print(' Emitting short op: ' +
- # self.optimizer.loop.logops.repr_of_resop(op))
-
- optimizer.send_extra_operation(op)
- seen[op.result] = None
- if op.is_ovf():
- guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
- optimizer.send_extra_operation(guard)
-
- def add_op_to_short(self, op, emit=True, guards_needed=False):
- if op is None:
- return None
- if op.result is not None and op.result in self.short_seen:
- if emit and self.short_inliner:
- return self.short_inliner.inline_arg(op.result)
- else:
- return None
-
- for a in op.getarglist():
- if not isinstance(a, Const) and a not in self.short_seen:
- self.add_op_to_short(self.short_boxes.producer(a), emit, guards_needed)
- if op.is_guard():
- op.setdescr(None) # will be set to a proper descr when the preamble is used
-
- if guards_needed and self.short_boxes.has_producer(op.result):
- value_guards = self.getvalue(op.result).make_guards(op.result)
- else:
- value_guards = []
-
- self.short.append(op)
- self.short_seen[op.result] = None
- if emit and self.short_inliner:
- newop = self.short_inliner.inline_op(op)
- self.optimizer.send_extra_operation(newop)
- else:
- newop = None
-
- if op.is_ovf():
- # FIXME: ensure that GUARD_OVERFLOW:ed ops not end up here
- guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
- self.add_op_to_short(guard, emit, guards_needed)
- for guard in value_guards:
- self.add_op_to_short(guard, emit, guards_needed)
-
- if newop:
- return newop.result
- return None
-
- def import_box(self, box, inputargs, short_jumpargs, jumpargs):
- if isinstance(box, Const) or box in inputargs:
- return
- if box in self.boxes_created_this_iteration:
- return
-
- short_op = self.short_boxes.producer(box)
- newresult = self.add_op_to_short(short_op)
-
- short_jumpargs.append(short_op.result)
- inputargs.append(box)
- box = newresult
- if box in self.optimizer.values:
- box = self.optimizer.values[box].force_box(self.optimizer)
- jumpargs.append(box)
-
-
- def _import_op(self, op, inputargs, short_jumpargs, jumpargs):
- self.boxes_created_this_iteration[op.result] = None
- args = op.getarglist()
- if op.is_guard():
- args = args + op.getfailargs()
-
- for a in args:
- self.import_box(a, inputargs, short_jumpargs, jumpargs)
-
- def jump_to_already_compiled_trace(self, jumpop, patchguardop):
- jumpop = jumpop.clone()
- assert jumpop.getopnum() == rop.JUMP
- cell_token = jumpop.getdescr()
-
- assert isinstance(cell_token, JitCellToken)
- if not cell_token.target_tokens:
- return False
-
- if not self.inline_short_preamble:
- assert cell_token.target_tokens[0].virtual_state is None
- jumpop.setdescr(cell_token.target_tokens[0])
- self.optimizer.send_extra_operation(jumpop)
- return True
-
- args = jumpop.getarglist()
- virtual_state = self.get_virtual_state(args)
- values = [self.getvalue(arg)
- for arg in jumpop.getarglist()]
- debug_start('jit-log-virtualstate')
- virtual_state.debug_print("Looking for ", metainterp_sd=self.optimizer.metainterp_sd)
-
- for target in cell_token.target_tokens:
- if not target.virtual_state:
- continue
- extra_guards = []
-
- try:
- cpu = self.optimizer.cpu
- state = target.virtual_state.generate_guards(virtual_state,
- values,
- cpu)
-
- extra_guards = state.extra_guards
- if extra_guards:
- debugmsg = 'Guarded to match '
- else:
- debugmsg = 'Matched '
- except VirtualStatesCantMatch, e:
- debugmsg = 'Did not match:\n%s\n' % (e.msg, )
- target.virtual_state.debug_print(debugmsg, e.state.bad, metainterp_sd=self.optimizer.metainterp_sd)
- continue
-
- assert patchguardop is not None or (extra_guards == [] and len(target.short_preamble) == 1)
-
- target.virtual_state.debug_print(debugmsg, {})
-
- debug_stop('jit-log-virtualstate')
-
- args = target.virtual_state.make_inputargs(values, self.optimizer,
- keyboxes=True)
- short_inputargs = target.short_preamble[0].getarglist()
- inliner = Inliner(short_inputargs, args)
-
- for guard in extra_guards:
- if guard.is_guard():
- assert isinstance(patchguardop, GuardResOp)
- assert isinstance(guard, GuardResOp)
- guard.rd_snapshot = patchguardop.rd_snapshot
- guard.rd_frame_info_list = patchguardop.rd_frame_info_list
- guard.setdescr(compile.ResumeAtPositionDescr())
- self.optimizer.send_extra_operation(guard)
-
- try:
- # NB: the short_preamble ends with a jump
- self._inline_short_preamble(target.short_preamble, inliner,
- patchguardop,
- target.assumed_classes)
- except InvalidLoop:
- #debug_print("Inlining failed unexpectedly",
- # "jumping to preamble instead")
- assert cell_token.target_tokens[0].virtual_state is None
- jumpop.setdescr(cell_token.target_tokens[0])
- self.optimizer.send_extra_operation(jumpop)
- return True
- debug_stop('jit-log-virtualstate')
- return False
-
- def _inline_short_preamble(self, short_preamble, inliner, patchguardop,
- assumed_classes):
- i = 1
- # XXX this is intentiontal :-(. short_preamble can change during the
- # loop in some cases
- while i < len(short_preamble):
- shop = short_preamble[i]
- newop = inliner.inline_op(shop)
- if newop.is_guard():
- if not patchguardop:
- raise InvalidLoop("would like to have short preamble, but it has a guard and there's no guard_future_condition")
- assert isinstance(newop, GuardResOp)
- assert isinstance(patchguardop, GuardResOp)
- newop.rd_snapshot = patchguardop.rd_snapshot
- newop.rd_frame_info_list = patchguardop.rd_frame_info_list
- newop.setdescr(compile.ResumeAtPositionDescr())
- self.optimizer.send_extra_operation(newop)
- if shop.result in assumed_classes:
- classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu)
- if not classbox or not classbox.same_constant(assumed_classes[shop.result]):
- raise InvalidLoop('The class of an opaque pointer before the jump ' +
- 'does not mach the class ' +
- 'it has at the start of the target loop')
- i += 1
-
-
-class ValueImporter(object):
- def __init__(self, unroll, value, op):
- self.unroll = unroll
- self.preamble_value = value
- self.op = op
-
- def import_value(self, value):
- value.import_from(self.preamble_value, self.unroll.optimizer)
- self.unroll.add_op_to_short(self.op, False, True)
-
-
-class ExportedState(object):
- def __init__(self, short_boxes, inputarg_setup_ops, exported_values):
- self.short_boxes = short_boxes
- self.inputarg_setup_ops = inputarg_setup_ops
- self.exported_values = exported_values
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -0,0 +1,170 @@
+import sys
+
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.jit.backend.llgraph.runner import ArrayDescr
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, Const
+from rpython.jit.metainterp.inliner import Inliner
+from rpython.jit.metainterp.optimize import InvalidLoop
+from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization
+from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
+from rpython.jit.metainterp.resoperation import rop, ResOperation, GuardResOp
+from rpython.jit.metainterp.resume import Snapshot
+from rpython.jit.metainterp import compile
+from rpython.rlib.debug import debug_print, debug_start, debug_stop
+
+def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations, start_state=None,
+ export_state=True):
+ opt = OptVectorize(metainterp_sd, jitdriver_sd, loop, optimizations)
+ opt_loop = opt.propagate_all_forward(start_state, export_state)
+ if opt.vectorized:
+ return opt_loop
+ # vectorization is not possible, propagate only normal optimizations
+ opt = Optimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
+ opt.propagate_all_forward()
+ return loop
+
+
+class VectorizeOptimizer(Optimizer):
+ def setup(self):
+ pass
+
+class OptVectorize(Optimization):
+ """ Try to unroll the loop and find instructions to group """
+
+ inline_short_preamble = True
+
+ def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations):
+ self.optimizer = VectorizeOptimizer(metainterp_sd, jitdriver_sd,
+ loop, optimizations)
+ self.loop_vectorizer_checker = LoopVectorizeChecker()
+ self.vectorized = False
+
+ def _rename_arguments_ssa(self, rename_map, label_args, jump_args):
+ # fill the map with the renaming boxes. keys are boxes from the label
+ # values are the target boxes.
+ for la,ja in zip(label_args, jump_args):
+ if la != ja:
+ rename_map[la] = ja
+
+ def unroll_loop_iterations(self, loop, unroll_factor):
+ label_op = loop.operations[0]
+ jump_op = loop.operations[-1]
+ operations = loop.operations[1:-1]
+ loop.operations = []
+
+ iterations = [[op.clone() for op in operations]]
+ label_op_args = [self.getvalue(box).get_key_box() for box in label_op.getarglist()]
+ values = [self.getvalue(box) for box in label_op.getarglist()]
+ #values[0].make_nonnull(self.optimizer)
+
+ jump_op_args = jump_op.getarglist()
+
+ rename_map = {}
+ for unroll_i in range(2, unroll_factor+1):
+ # for each unrolling factor the boxes are renamed.
+ self._rename_arguments_ssa(rename_map, label_op_args, jump_op_args)
+ iteration_ops = []
+ for op in operations:
+ copied_op = op.clone()
+
+ if copied_op.result is not None:
+ # every result assigns a new box, thus creates an entry
+ # to the rename map.
+ new_assigned_box = copied_op.result.clonebox()
+ rename_map[copied_op.result] = new_assigned_box
+ copied_op.result = new_assigned_box
+
+ args = copied_op.getarglist()
+ for i, arg in enumerate(args):
+ try:
+ value = rename_map[arg]
+ copied_op.setarg(i, value)
+ except KeyError:
+ pass
+
+ iteration_ops.append(copied_op)
+
+ # the jump arguments have been changed
+ # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop
+ # must look like this: label(i(X+1)) ... jump(i(X+2))
+
+ args = jump_op.getarglist()
+ for i, arg in enumerate(args):
+ try:
+ value = rename_map[arg]
+ jump_op.setarg(i, value)
+ except KeyError:
+ pass
+ # map will be rebuilt, the jump operation has been updated already
+ rename_map.clear()
+
+ iterations.append(iteration_ops)
+
+ # unwrap the loop nesting.
+ loop.operations.append(label_op)
+ for iteration in iterations:
+ for op in iteration:
+ loop.operations.append(op)
+ loop.operations.append(jump_op)
+
+ return loop
+
+ def _gather_trace_information(self, loop):
+ for op in loop.operations:
+ self.loop_vectorizer_checker.inspect_operation(op)
+
+ def get_estimated_unroll_factor(self, force_reg_bytes = -1):
+ """ force_reg_bytes used for testing """
+ # this optimization is not opaque, and needs info about the CPU
+ byte_count = self.loop_vectorizer_checker.smallest_type_bytes
+ simd_vec_reg_bytes = 16 # TODO get from cpu
+ if force_reg_bytes > 0:
+ simd_vec_reg_bytes = force_simd_vec_reg_bytes
+ unroll_factor = simd_vec_reg_bytes // byte_count
+ return unroll_factor
+
+ def propagate_all_forward(self, starting_state, export_state=True):
+
+ self.optimizer.exporting_state = export_state
+ loop = self.optimizer.loop
+ self.optimizer.clear_newoperations()
+
+ self._gather_trace_information(loop)
+
+ for op in loop.operations:
+ self.loop_vectorizer_checker.inspect_operation(op)
+
+ byte_count = self.loop_vectorizer_checker.smallest_type_bytes
+ if byte_count == 0:
+ # stop, there is no chance to vectorize this trace
+ return loop
+
+ unroll_factor = self.get_estimated_unroll_factor()
+
+ self.unroll_loop_iterations(loop, unroll_factor)
+
+
+ self.vectorized = True
+
+ return loop
+
+class LoopVectorizeChecker(object):
+
+ def __init__(self):
+ self.smallest_type_bytes = 0
+
+ def count_RAW_LOAD(self, op):
+ descr = op.getdescr()
+ assert isinstance(descr, ArrayDescr) # TODO prove this right
+ if not isinstance(descr.A.OF, lltype.Ptr):
+ byte_count = rffi.sizeof(descr.A.OF)
+ if self.smallest_type_bytes == 0 \
+ or byte_count < self.smallest_type_bytes:
+ self.smallest_type_bytes = byte_count
+
+ def default_count(self, operation):
+ pass
+
+dispatch_opt = make_dispatcher_method(LoopVectorizeChecker, 'count_',
+ default=LoopVectorizeChecker.default_count)
+LoopVectorizeChecker.inspect_operation = dispatch_opt
More information about the pypy-commit
mailing list