[pypy-commit] pypy vecopt: refactored the dependency construction for guards
plan_rich
noreply at buildbot.pypy.org
Thu May 21 18:26:38 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77452:2cdfa2593741
Date: 2015-05-21 09:56 +0200
http://bitbucket.org/pypy/pypy/changeset/2cdfa2593741/
Log: refactored the dependency construction for guards guard relaxation
is now simpler and faster saving backwards edge for each dependency
object in the field backward last commit disabled vectorize for
test_zjit (still not working)
diff --git a/rpython/jit/backend/x86/test/test_vectorize.py b/rpython/jit/backend/x86/test/test_vectorize.py
--- a/rpython/jit/backend/x86/test/test_vectorize.py
+++ b/rpython/jit/backend/x86/test/test_vectorize.py
@@ -47,7 +47,7 @@
asm.mc.AND_rr(ecx.value, edx.value)
asm.mc.ADD(eax, ecx)
- asm.mc.PSRLDQ_xi((xmm7.value, 4))
+ asm.mc.PSRLDQ_xi(xmm7.value, 4)
asm.mc.MOVDQ_rx(ecx.value, xmm7.value)
asm.mc.AND_rr(ecx.value, edx.value)
asm.mc.ADD(eax, ecx)
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -61,7 +61,7 @@
def set_schedule_priority(self, p):
for node in self.path:
- node.priority = p
+ node.setpriority(p)
def walk(self, node):
self.path.append(node)
@@ -94,28 +94,12 @@
def getopname(self):
return self.op.getopname()
+ def setpriority(self, value):
+ self.priority = value
+
def can_be_relaxed(self):
return self.op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE)
- def getfailarg_set(self):
- op = self.getoperation()
- assert isinstance(op, GuardResOp)
- args = {}
- if op.getfailargs():
- for arg in op.getfailargs():
- args[arg] = None
- return args.keys()
- elif op.rd_snapshot:
- ss = op.rd_snapshot
- assert isinstance(ss, Snapshot)
- while ss:
- for box in ss.boxes:
- args[box] = None
- ss = ss.prev
-
- return args.keys()
-
-
def relax_guard_to(self, guard):
""" Relaxes a guard operation to an earlier guard. """
# clone this operation object. if the vectorizer is
@@ -142,16 +126,17 @@
#if not we_are_translated():
tgt_op.setfailargs(op.getfailargs())
- def edge_to(self, to, arg=None, label=None):
+ def edge_to(self, to, arg=None, failarg=False, label=None):
if self is to:
print "debug: tried to put edge from: ", self.op, "to:", to.op
return
dep = self.depends_on(to)
if not dep:
#if force or self.independent(idx_from, idx_to):
- dep = Dependency(self, to, arg)
+ dep = Dependency(self, to, arg, failarg)
self.adjacent_list.append(dep)
- dep_back = Dependency(to, self, arg)
+ dep_back = Dependency(to, self, arg, failarg)
+ dep.backward = dep_back
to.adjacent_list_back.append(dep_back)
if not we_are_translated():
if label is None:
@@ -160,9 +145,14 @@
else:
if not dep.because_of(arg):
dep.add_dependency(self,to,arg)
+ # if a fail argument is overwritten by another normal
+ # dependency it will remove the failarg flag
+ if not (dep.is_failarg() and failarg):
+ dep.set_failarg(False)
if not we_are_translated() and label is not None:
_label = getattr(dep, 'label', '')
dep.label = _label + ", " + label
+ return dep
def clear_dependencies(self):
self.adjacent_list = []
@@ -226,7 +216,7 @@
any other case.
"""
for edge in self.adjacent_list:
- if edge.to == to:
+ if edge.to is to:
return edge
return None
@@ -333,14 +323,15 @@
class Dependency(object):
- def __init__(self, at, to, arg, flow=True):
+ def __init__(self, at, to, arg, failarg=False):
assert at != to
self.args = []
if arg is not None:
self.add_dependency(at, to, arg)
self.at = at
self.to = to
- self.flow = True
+ self.failarg = failarg
+ self.backward = None
def because_of(self, var):
for arg in self.args:
@@ -371,11 +362,13 @@
def add_dependency(self, at, to, arg):
self.args.append((at,arg))
- def set_flow(self, flow):
- self.flow = flow
+ def set_failarg(self, value):
+ self.failarg = value
+ if self.backward:
+ self.backward.failarg = value
- def get_flow(self):
- return self.flow
+ def is_failarg(self):
+ return self.failarg
def reverse_direction(self, ref):
""" if the parameter index is the same as idx_to then
@@ -505,13 +498,13 @@
for i,node in enumerate(self.nodes):
op = node.op
if op.is_always_pure():
- node.priority = 1
+ node.setpriority(1)
if op.is_guard():
- node.priority = 2
+ node.setpriority(2)
# the label operation defines all operations at the
# beginning of the loop
if op.getopnum() == rop.LABEL and i != jump_pos:
- node.priority = 100
+ node.setpriority(100)
label_pos = i
for arg in op.getarglist():
tracker.define(arg, node)
@@ -535,11 +528,11 @@
elif op.is_guard():
self.guards.append(node)
else:
- self._build_non_pure_dependencies(node, tracker)
+ self.build_non_pure_dependencies(node, tracker)
# pass 2 correct guard dependencies
for guard_node in self.guards:
op = guard_node.getoperation()
- self._build_guard_dependencies(guard_node, op.getopnum(), tracker)
+ self.build_guard_dependencies(guard_node, op.getopnum(), tracker)
# pass 3 find schedulable nodes
jump_node = self.nodes[jump_pos]
label_node = self.nodes[label_pos]
@@ -552,38 +545,69 @@
if node.provides_count() == 0:
node.edge_to(jump_node, None, label='jump')
- def _build_guard_dependencies(self, guard_node, guard_opnum, tracker):
+ def guard_argument_protection(self, guard_node, tracker):
+ """ the parameters the guard protects are an indicator for
+ dependencies. Consider the example:
+ i3 = ptr_eq(p1,p2)
+ guard_true(i3) [...]
+
+ guard_true|false are exceptions because they do not directly
+ protect the arguments, but a comparison function does.
+ """
+ guard_op = guard_node.getoperation()
+ guard_opnum = guard_op.getopnum()
+ if guard_opnum in (rop.GUARD_TRUE, rop.GUARD_FALSE):
+ for dep in guard_node.depends():
+ op = dep.to.getoperation()
+ for arg in op.getarglist():
+ if isinstance(arg, Box):
+ self.guard_exit_dependence(guard_node, arg, tracker)
+ elif guard_op.is_foldable_guard():
+ # these guards carry their protected variables directly as a parameter
+ for arg in guard_node.getoperation().getarglist():
+ if isinstance(arg, Box):
+ self.guard_exit_dependence(guard_node, arg, tracker)
+ elif guard_opnum == rop.GUARD_NOT_FORCED_2:
+ # must be emitted before finish, thus delayed the longest
+ guard_node.setpriority(-10)
+ elif guard_opnum in (rop.GUARD_OVERFLOW, rop.GUARD_NO_OVERFLOW):
+ # previous operation must be an ovf_operation
+ guard_node.setpriority(100)
+ prev_node = self.nodes[guard_node.getindex()-1]
+ assert prev_node.getoperation().is_ovf()
+ prev_node.edge_to(guard_node, None, label='overflow')
+ elif guard_opnum == rop.GUARD_NOT_FORCED:
+ # previous op must be one that can raise
+ guard_node.setpriority(100)
+ prev_node = self.nodes[guard_node.getindex()-1]
+ assert prev_node.getoperation().can_raise()
+ prev_node.edge_to(guard_node, None, label='forced')
+ elif guard_opnum in (rop.GUARD_NO_EXCEPTION, rop.GUARD_EXCEPTION):
+ # previous op must be one that can raise or a not forced guard
+ guard_node.setpriority(100)
+ prev_node = self.nodes[guard_node.getindex()-1]
+ prev_node.edge_to(guard_node, None, label='exception')
+ if not prev_node.getoperation().getopnum() == rop.GUARD_NOT_FORCED:
+ assert prev_node.getoperation().can_raise()
+ else:
+ pass # not invalidated, early exit, future condition!
+
+ def guard_exit_dependence(self, guard_node, var, tracker):
+ def_node = tracker.definition(var)
+ for dep in def_node.provides():
+ if guard_node.is_before(dep.to) and dep.because_of(var):
+ guard_node.edge_to(dep.to, var, label='guard_exit('+str(var)+')')
+
+ def build_guard_dependencies(self, guard_node, guard_opnum, tracker):
if guard_opnum >= rop.GUARD_NOT_INVALIDATED:
- # ignure invalidated & future condition guard
+ # ignore invalidated & future condition guard & early exit
return
- # 'GUARD_TRUE/1d',
- # 'GUARD_FALSE/1d',
- # 'GUARD_VALUE/2d',
- # 'GUARD_CLASS/2d',
- # 'GUARD_NONNULL/1d',
- # 'GUARD_ISNULL/1d',
- # 'GUARD_NONNULL_CLASS/2d',
+ # true dependencies
guard_op = guard_node.op
for arg in guard_op.getarglist():
tracker.depends_on_arg(arg, guard_node)
-
- variables = []
- for dep in guard_node.depends():
- op = dep.to.op
- for arg in op.getarglist():
- if isinstance(arg, Box):
- variables.append(arg)
- if op.result:
- variables.append(op.result)
- #
- for var in variables:
- try:
- def_node = tracker.definition(var)
- for dep in def_node.provides():
- if guard_node.is_before(dep.to) and dep.because_of(var):
- guard_node.edge_to(dep.to, var, label='prev('+str(var)+')')
- except KeyError:
- pass
+ # dependencies to uses of arguments it protects
+ self.guard_argument_protection(guard_node, tracker)
# handle fail args
if guard_op.getfailargs():
for arg in guard_op.getfailargs():
@@ -595,38 +619,11 @@
descr = guard_op.getdescr()
if at.is_before(guard_node) and \
not isinstance(descr, compile.ResumeAtLoopHeaderDescr):
- at.edge_to(guard_node, arg, label="fail")
+ at.edge_to(guard_node, arg, failarg=True, label="fail")
except KeyError:
assert False
- #
- # guards check overflow or raise are directly dependent
- # find the first non guard operation
- prev_op_idx = guard_node.opidx - 1
- while prev_op_idx > 0:
- prev_node = self.nodes[prev_op_idx]
- if prev_node.op.is_guard():
- prev_op_idx -= 1
- else:
- break
- prev_node = self.nodes[prev_op_idx]
- guard_op = guard_node.getoperation()
- prev_op = prev_node.getoperation()
- if guard_op.is_guard_exception() and prev_op.can_raise():
- self.guard_inhert(prev_node, guard_node)
- elif guard_op.is_guard_overflow() and prev_op.is_ovf():
- self.guard_inhert(prev_node, guard_node)
- elif guard_op.getopnum() == rop.GUARD_NOT_FORCED and prev_op.can_raise():
- self.guard_inhert(prev_node, guard_node)
- elif guard_op.getopnum() == rop.GUARD_NOT_FORCED_2 and prev_op.can_raise():
- self.guard_inhert(prev_node, guard_node)
- def guard_inhert(self, at, to):
- at.edge_to(to, None, label='inhert')
- for dep in at.provides():
- if to.is_before(dep.to):
- to.edge_to(dep.to, None, label='inhert')
-
- def _build_non_pure_dependencies(self, node, tracker):
+ def build_non_pure_dependencies(self, node, tracker):
op = node.op
if node.loads_from_complex_object():
# If this complex object load operation loads an index that has been
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -260,7 +260,7 @@
[p0, p1, i2] # 0: 1,2?,3?,4?,5?
i3 = int_add(i2,1) # 1: 2
i4 = call(p0, i3, descr=nonwritedescr) # 2: 3,4,5?
- guard_no_exception() [i2] # 3: 4,5?
+ guard_no_exception() [i2] # 3: 4?,5?
p2 = getarrayitem_gc(p1,i3,descr=intarraydescr) # 4: 5
jump(p2, p1, i3) # 5:
"""
@@ -271,12 +271,26 @@
[p0, p1, i2, i5] # 0: 1,2?,3?,4?,5?
i3 = int_add(i2,1) # 1: 2
i4 = call(i5, i3, descr=nonwritedescr) # 2: 3,4,5?
- guard_no_exception() [i2] # 3: 4,5?
+ guard_no_exception() [i2] # 3: 5?
p2 = getarrayitem_gc(p1,i3,descr=chararraydescr) # 4: 5
jump(p2, p1, i3, i5) # 5:
"""
self.assert_dependencies(ops, full_check=True)
+ def test_not_forced(self):
+ ops="""
+ [p0, p1, i2, i5] # 0: 1,2,4?,5,6
+ i4 = call(i5, i2, descr=nonwritedescr) # 1: 2,4,6
+ guard_not_forced() [i2] # 2: 3
+ guard_no_exception() [] # 3: 6
+ i3 = int_add(i2,1) # 4: 5
+ p2 = getarrayitem_gc(p1,i3,descr=chararraydescr) # 5: 6
+ jump(p2, p1, i2, i5) # 6:
+ """
+ self.assert_dependencies(ops, full_check=True)
+ assert self.last_graph.nodes[2].priority == 100
+ assert self.last_graph.nodes[3].priority == 100
+
def test_setarrayitem_dependency(self):
ops="""
[p0, i1] # 0: 1,2?,3?,4?
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -1058,12 +1058,12 @@
guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41]
guard_early_exit() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41]
i50 = int_add(i28, 1)
+ i46 = int_add(i44, 8)
i48 = int_add(i41, 8)
- i46 = int_add(i44, 8)
i51 = int_add(i37, 8)
i52 = int_ge(i50, i18)
+ i55 = int_add(i44, 16)
i54 = int_add(i41, 16)
- i55 = int_add(i44, 16)
i56 = int_add(i37, 16)
i53 = int_add(i28, 2)
i57 = int_ge(i53, i18)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -21,6 +21,7 @@
return 'NotAVectorizeableLoop()'
def debug_print_operations(loop):
+ """ NOT_RPYTHON """
if not we_are_translated():
print('--- loop instr numbered ---')
def ps(snap):
@@ -43,21 +44,26 @@
inline_short_preamble, start_state, False)
orig_ops = loop.operations
try:
+ debug_start("vec-opt-loop")
+ metainterp_sd.logger_opt.log_loop(loop.inputargs, loop.operations, "unroll", -2, 0, "pre vectorize")
metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
opt.propagate_all_forward()
metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
+
+ metainterp_sd.logger_opt.log_loop(loop.inputargs, loop.operations, "vec", -2, 0, "post vectorize")
except NotAVectorizeableLoop:
# vectorization is not possible
loop.operations = orig_ops
except Exception as e:
loop.operations = orig_ops
- debug_start("failed to vec loop")
- metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations)
- from rpython.rtyper.lltypesystem import lltype
- from rpython.rtyper.lltypesystem.lloperation import llop
- llop.debug_print_traceback(lltype.Void)
- debug_stop("failed to vec loop")
+ debug_print("failed to vectorize loop. THIS IS A FATAL ERROR!")
+ if we_are_translated():
+ from rpython.rtyper.lltypesystem import lltype
+ from rpython.rtyper.lltypesystem.lloperation import llop
+ llop.debug_print_traceback(lltype.Void)
+ finally:
+ debug_stop("vec-opt-loop")
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
@@ -427,37 +433,47 @@
label_node = graph.getnode(0)
ee_guard_node = graph.getnode(self.early_exit_idx)
guards = graph.guards
- fail_args = []
for guard_node in guards:
if guard_node is ee_guard_node:
continue
- del_deps = []
- pullup = []
+ modify_later = []
valid_trans = True
last_prev_node = None
for path in guard_node.iterate_paths(ee_guard_node, True):
prev_node = path.second()
- if fail_args_break_dependency(guard_node, prev_node, ee_guard_node):
+ dep = prev_node.depends_on(guard_node)
+ if dep.is_failarg():
+ # this dependency we are able to break because it is soley
+ # relevant due to one or multiple fail args
if prev_node == last_prev_node:
+ # ...
+ # o o
+ # \ /
+ # (a)
+ # |
+ # (g)
+ # this graph yields 2 paths from (g), thus (a) is
+ # remembered and skipped
continue
- del_deps.append((prev_node, guard_node))
+ modify_later.append((prev_node, guard_node))
else:
if path.has_no_side_effects(exclude_first=True, exclude_last=True):
- #index_guards[guard.getindex()] = IndexGuard(guard, path.path[:])
path.set_schedule_priority(10)
- pullup.append(path.last_but_one())
+ modify_later.append((path.last_but_one(), None))
else:
valid_trans = False
break
last_prev_node = prev_node
if valid_trans:
- for a,b in del_deps:
- a.remove_edge_to(b)
- for lbo in pullup:
- if lbo is ee_guard_node:
- continue
- ee_guard_node.remove_edge_to(lbo)
- label_node.edge_to(lbo, label='pullup')
+ for a,b in modify_later:
+ if b is not None:
+ a.remove_edge_to(b)
+ else:
+ last_but_one = a
+ if last_but_one is ee_guard_node:
+ continue
+ ee_guard_node.remove_edge_to(last_but_one)
+ label_node.edge_to(last_but_one, label='pullup')
# only the last guard needs a connection
guard_node.edge_to(ee_guard_node, label='pullup-last-guard')
guard_node.relax_guard_to(ee_guard_node)
@@ -516,26 +532,6 @@
return False
return True
-def fail_args_break_dependency(guard, prev_op, target_guard):
- failargs = guard.getfailarg_set()
- new_failargs = target_guard.getfailarg_set()
-
- op = prev_op.getoperation()
- if not op.is_always_pure(): # TODO has_no_side_effect():
- return True
- if op.result is not None:
- arg = op.result
- if arg not in failargs or \
- arg in failargs and arg in new_failargs:
- return False
- for arg in op.getarglist():
- if arg not in failargs or \
- arg in failargs and arg in new_failargs:
- return False
- # THINK about: increased index in fail arg, but normal index on arglist
- # this might be an indicator for edge removal
- return True
-
class PackType(PrimitiveTypeMixin):
UNKNOWN_TYPE = '-'
More information about the pypy-commit
mailing list