[pypy-commit] pypy vecopt2: making the dependency builder less conservative. added test for aliasing modification problem

Tue May 5 09:45:40 CEST 2015

Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77091:36ba6737e164
Date: 2015-03-25 15:47 +0100
http://bitbucket.org/pypy/pypy/changeset/36ba6737e164/

Log:	making the dependency builder less conservative. added test for
	aliasing modification problem

diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -4,20 +4,28 @@
 from rpython.rtyper.lltypesystem import llmemory
 from rpython.rlib.unroll import unrolling_iterable
 
-MODIFY_COMPLEX_OBJ = [ (rop.SETARRAYITEM_GC, 0)
-                     , (rop.SETARRAYITEM_RAW, 0)
-                     , (rop.RAW_STORE, 0)
-                     , (rop.SETINTERIORFIELD_GC, 0)
-                     , (rop.SETINTERIORFIELD_RAW, 0)
-                     , (rop.SETFIELD_GC, 0)
-                     , (rop.SETFIELD_RAW, 0)
-                     , (rop.ZERO_PTR_FIELD, 0)
-                     , (rop.ZERO_PTR_FIELD, 0)
-                     , (rop.ZERO_ARRAY, 0)
-                     , (rop.STRSETITEM, 0)
-                     , (rop.UNICODESETITEM, 0)
+MODIFY_COMPLEX_OBJ = [ (rop.SETARRAYITEM_GC, 0, 1)
+                     , (rop.SETARRAYITEM_RAW, 0, 1)
+                     , (rop.RAW_STORE, 0, 1)
+                     , (rop.SETINTERIORFIELD_GC, 0, -1)
+                     , (rop.SETINTERIORFIELD_RAW, 0, -1)
+                     , (rop.SETFIELD_GC, 0, -1)
+                     , (rop.SETFIELD_RAW, 0, -1)
+                     , (rop.ZERO_PTR_FIELD, 0, -1)
+                     , (rop.ZERO_PTR_FIELD, 0, -1)
+                     , (rop.ZERO_ARRAY, 0, -1)
+                     , (rop.STRSETITEM, 0, -1)
+                     , (rop.UNICODESETITEM, 0, -1)
                      ]
 
+LOAD_COMPLEX_OBJ = [ (rop.GETARRAYITEM_GC, 0, 1)
+                   , (rop.GETARRAYITEM_RAW, 0, 1)
+                   , (rop.GETINTERIORFIELD_GC, 0, 1)
+                   , (rop.RAW_LOAD, 0, 1)
+                   , (rop.GETFIELD_GC, 0, 1)
+                   , (rop.GETFIELD_RAW, 0, 1)
+                   ]
+
 class Dependency(object):
     def __init__(self, idx_from, idx_to, arg):
         assert idx_from != idx_to
@@ -55,6 +63,10 @@
 
         self.build_dependencies(self.operations)
 
+    def is_complex_object_load(self, op):
+        opnum = op.getopnum()
+        return rop._ALWAYS_PURE_LAST <= opnum and opnum <= rop._MALLOC_FIRST
+
     def build_dependencies(self, operations):
         """ This is basically building the definition-use chain and saving this
             information in a graph structure. This is the same as calculating
@@ -64,6 +76,7 @@
             the operations are in SSA form
         """
         defining_indices = {}
+        complex_indices = {}
 
         for i,op in enumerate(operations):
             # the label operation defines all operations at the
@@ -73,66 +86,115 @@
                     defining_indices[arg] = 0
                 continue # prevent adding edge to the label itself
 
-            # TODO what about a JUMP operation? it often has many parameters
-            # (10+) and uses  nearly every definition in the trace (for loops).
-            # Maybe we can skip this operation and let jump NEVER move...
-
             if op.result is not None:
-                # the trace is always in SSA form, thus it is neither possible to have a WAR
-                # not a WAW dependency
+                # the trace is always in SSA form, thus it is neither possible
+                # to have a WAR not a WAW dependency
                 defining_indices[op.result] = i
 
-            for arg in op.getarglist():
-                if arg in defining_indices:
-                    idx = defining_indices[arg]
-                    self._put_edge(idx, i, arg)
+            if self.is_complex_object_load(op):
+                self._reuse_complex_definitions(op, i, defining_indices, complex_indices)
+            elif op.getopnum() == rop.JUMP:
+                self._finish_building_graph(op, i, defining_indices, complex_indices)
+            else:
+                # normal case every arguments definition is set
+                for arg in op.getarglist():
+                    self._def_use(arg, i, defining_indices)
 
             if op.getfailargs():
                 for arg in op.getfailargs():
-                    if arg in defining_indices:
-                        idx = defining_indices[arg]
-                        self._put_edge(idx, i, arg)
+                    self._def_use(arg, i, defining_indices)
 
             # a trace has store operations on complex operations
             # (e.g. setarrayitem). in general only once cell is updated,
             # and in theroy it could be tracked but for simplicity, the
             # whole is marked as redefined, thus any later usage sees
             # only this definition.
-            self._redefine_if_complex_obj_is_modified(op, i, defining_indices)
+            self._redefine_complex_modification(op, i, defining_indices,
+                                                complex_indices)
             if op.is_guard() and i > 0:
                 self._guard_dependency(op, i, operations, defining_indices)
 
-    def _redefine_if_complex_obj_is_modified(self, op, index, defining_indices):
+    def _finish_building_graph(self, jumpop, orig_index, defining_indices, complex_indices):
+        assert jumpop.getopnum() == rop.JUMP
+        for (cobj, obj_index),index in complex_indices.items():
+            try:
+                old_idx = defining_indices[cobj]
+                if old_idx < index:
+                    defining_indices[cobj] = index
+            except KeyError:
+                defining_indices[cobj] = index
+
+        for arg in jumpop.getarglist():
+            self._def_use(arg, orig_index, defining_indices)
+
+    def _reuse_complex_definitions(self, op, index, defining_indices, complex_indices):
+        """ If this complex object load operation loads an index that has been
+        modified, the last modification should be used to put a def-use edge.
+        """
+        for opnum, i, j in unrolling_iterable(LOAD_COMPLEX_OBJ):
+            if opnum == op.getopnum():
+                cobj = op.getarg(i)
+                index_var = op.getarg(j)
+                try:
+                    cindex = complex_indices[(cobj, index_var)]
+                    self._put_edge(cindex, index, cobj)
+                except KeyError:
+                    # not redefined, edge to the label(...) definition
+                    self._def_use(cobj, index, defining_indices)
+
+                # def-use for the index variable
+                self._def_use(index_var, index, defining_indices)
+
+    def _def_use(self, param, index, defining_indices):
+        try:
+            def_idx = defining_indices[param]
+            self._put_edge(def_idx, index, param)
+        except KeyError:
+            pass
+
+    def _redefine_complex_modification(self, op, index, defining_indices, complex_indices):
         if not op.has_no_side_effect():
-            for arg in self._destroyed_arguments(op):
-                try:
-                    # put an edge from the definition and all later uses until this
-                    # instruction to this instruction
-                    def_idx = defining_indices[arg]
-                    for dep in self.instr_dependencies(def_idx):
-                        if dep.idx_to >= index:
-                            break
-                        self._put_edge(dep.idx_to, index, arg)
-                    self._put_edge(def_idx, index, arg)
-                except KeyError:
-                    pass
+            for cobj, arg in self._destroyed_arguments(op):
+                if arg is not None:
+                    # tracks the exact cell that is modified
+                    try:
+                        cindex = complex_indices[(cobj,arg)]
+                        self._put_edge(cindex, index, cobj)
+                    except KeyError:
+                        pass
+                    complex_indices[(cobj,arg)] = index
+                else:
+                    # we cannot prove that only a cell is modified, but we have
+                    # to assume that many of them are!
+                    try:
+                        # put an edge from the def. and all later uses until this
+                        # instruction to this instruction
+                        def_idx = defining_indices[cobj]
+                        for dep in self.instr_dependencies(def_idx):
+                            if dep.idx_to >= index:
+                                break
+                            self._put_edge(dep.idx_to, index, arg)
+                        self._put_edge(def_idx, index, arg)
+                    except KeyError:
+                        pass
 
     def _destroyed_arguments(self, op):
-        # conservative, if an item in array p0 is modified or a call
-        # contains a boxptr parameter, it is assumed that this is a
-        # new definition.
+        # if an item in array p0 is modified or a call contains an argument
+        # it can modify it is returned in the destroyed list.
         args = []
         if op.is_call() and op.getopnum() != rop.CALL_ASSEMBLER:
             # free destroys an argument -> connect all uses & def with it
             descr = op.getdescr()
             extrainfo = descr.get_extra_info()
             if extrainfo.oopspecindex == EffectInfo.OS_RAW_FREE:
-                args.append(op.getarg(1))
+                args.append((op.getarg(1),None))
         else:
-            for opnum, i in unrolling_iterable(MODIFY_COMPLEX_OBJ):
+            for opnum, i, j in unrolling_iterable(MODIFY_COMPLEX_OBJ):
                 if op.getopnum() == opnum:
-                    arg = op.getarg(i)
-                    args.append(arg)
+                    if j == -1:
+                        args.append((op.getarg(i), None))
+                    else:
+                        args.append((op.getarg(i), op.getarg(j)))
         return args
 
     def _guard_dependency(self, op, i, operations, defining_indices):
@@ -195,7 +257,7 @@
         edges = self.adjacent_list[idx]
         return edges
 
-    def independant(self, ai, bi):
+    def independent(self, ai, bi):
         """ An instruction depends on another if there is a dependency path from
         A to B. It is not enough to check only if A depends on B, because
         due to transitive relations.
@@ -216,7 +278,7 @@
                     continue
 
                 if dep.idx_from == ai:
-                    # dependant. There is a path from ai to bi
+                    # dependent. There is a path from ai to bi
                     return False
                 stmt_indices.append(dep.idx_from)
         return True
@@ -243,8 +305,14 @@
     def __repr__(self):
         graph = "graph([\n"
 
-        for l in self.adjacent_list:
-            graph += "       " + str([d.idx_to for d in l]) + "\n"
+        for i,l in enumerate(self.adjacent_list):
+            graph += "       "
+            for d in l:
+                if i == d.idx_from:
+                    graph += str(d.idx_to) + ","
+                else:
+                    graph += str(d.idx_from) + ","
+            graph += "\n"
 
         return graph + "      ])"
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -56,11 +56,12 @@
                    sorted([l.idx_to for l in lb])
             assert sorted([l.idx_from for l in la]) == \
                    sorted([l.idx_from for l in lb])
-    
+
     def assert_independent(self, a, b):
-        assert self.last_graph.independant(a,b), "{a} and {b} are dependant!".format(a=a,b=b)
+        assert self.last_graph.independent(a,b), "{a} and {b} are dependent!".format(a=a,b=b)
+
     def assert_dependent(self, a, b):
-        assert not self.last_graph.independant(a,b), "{a} and {b} are independant!".format(a=a,b=b)
+        assert not self.last_graph.independent(a,b), "{a} and {b} are independent!".format(a=a,b=b)
 
 class BaseTestDependencyGraph(DepTestHelper):
     def test_dependency_empty(self):
@@ -256,5 +257,30 @@
         self.assert_edges(dep_graph,
                 [ [1,2,3,4,5], [0,2,4,5], [0,1,3], [0,2], [0,1,5], [4,0,1] ])
 
+    def test_setarrayitem_dependency(self):
+        ops="""
+        [p0, i1]
+        setarrayitem_raw(p0, i1, 1, descr=floatarraydescr) # redef p0[i1]
+        i2 = getarrayitem_raw(p0, i1, descr=floatarraydescr) # use of redef above
+        setarrayitem_raw(p0, i1, 2, descr=floatarraydescr) # redef of p0[i1]
+        jump(p0, i2)
+        """
+        dep_graph = self.build_dependency(ops)
+        self.assert_edges(dep_graph,
+                [ [1,2,3], [0,2,3], [0,1,4], [0,1,4], [2,3] ])
+
+    def test_setarrayitem_alias_dependency(self):
+        # #1 depends on #2, i1 and i2 might alias, reordering would destroy
+        # coorectness
+        ops="""
+        [p0, i1, i2]
+        setarrayitem_raw(p0, i1, 1, descr=floatarraydescr) #1
+        setarrayitem_raw(p0, i2, 2, descr=floatarraydescr) #2
+        jump(p0, i1, i2)
+        """
+        dep_graph = self.build_dependency(ops)
+        self.assert_edges(dep_graph,
+                [ [1,2,3], [0,2], [0,1,3], [0,2] ])
+
 class TestLLtype(BaseTestDependencyGraph, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -98,7 +98,7 @@
         import itertools
         combintations = set(itertools.product(range(instr_count),
                                               range(instr_count)))
-        combintations -= set([(5,10),(4,9)])
+        combintations -= set(exceptions)
         for a,b in combintations:
             self.assert_packset_not_contains(packset, a, b)
 
@@ -583,7 +583,7 @@
         """
         loop = self.parse_loop(ops)
         vopt = self.init_pack_set(loop,1)
-        assert vopt.dependency_graph.independant(1,5)
+        assert vopt.dependency_graph.independent(1,5)
         assert vopt.pack_set is not None
         assert len(vopt.vec_info.memory_refs) == 2
         assert len(vopt.pack_set.packs) == 1
@@ -611,7 +611,7 @@
         for i in range(3):
             x = (i+1)*2
             y = x + 2
-            assert vopt.dependency_graph.independant(x,y)
+            assert vopt.dependency_graph.independent(x,y)
             self.assert_packset_contains(vopt.pack_set, x,y)
 
     def test_packset_init_2(self):
@@ -644,7 +644,7 @@
         for i in range(15):
             x = (i+1)*4
             y = x + 4
-            assert vopt.dependency_graph.independant(x,y)
+            assert vopt.dependency_graph.independent(x,y)
             self.assert_packset_contains(vopt.pack_set, x, y)
 
     def test_isomorphic_operations(self):
@@ -684,10 +684,32 @@
         vopt = self.extend_pack_set(loop,1)
         self.debug_print_operations(loop)
         assert len(vopt.vec_info.memory_refs) == 2
-        assert vopt.dependency_graph.independant(5,10) == True
+        assert vopt.dependency_graph.independent(5,10) == True
         assert len(vopt.pack_set.packs) == 2
         self.assert_packset_empty(vopt.pack_set, len(loop.operations),
                                   [(5,10), (4,9)])
 
+    def test_packset_extend_load_modify_store(self):
+        ops = """
+        [p0,i0]
+        i1 = int_add(i0, 1)
+        i2 = int_le(i1, 16)
+        guard_true(i2) [p0, i0]
+        i3 = getarrayitem_gc(p0, i1, descr=chararraydescr)
+        i4 = int_mul(i3, 2)
+        setarrayitem_gc(p0, i1, i4, descr=chararraydescr)
+        jump(p0,i1)
+        """
+        loop = self.parse_loop(ops)
+        vopt = self.extend_pack_set(loop,1)
+        self.debug_print_operations(loop)
+        assert len(vopt.vec_info.memory_refs) == 2
+        assert vopt.dependency_graph.independent(4,10)
+        assert vopt.dependency_graph.independent(5,11)
+        assert vopt.dependency_graph.independent(6,12)
+        assert len(vopt.pack_set.packs) == 3
+        self.assert_packset_empty(vopt.pack_set, len(loop.operations),
+                                  [(5,11), (4,10), (6,12)])
+
 class TestLLtype(BaseTestVectorize, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -309,7 +309,7 @@
         l_op = self.operations[lop_idx]
         r_op = self.operations[rop_idx]
         if isomorphic(l_op, r_op):
-            if self.dependency_graph.independant(lop_idx, rop_idx):
+            if self.dependency_graph.independent(lop_idx, rop_idx):
                 for pack in self.packs:
                     if pack.left.opidx == lop_idx or \
                        pack.right.opidx == rop_idx:
@@ -329,7 +329,7 @@
 class Pack(object):
     """ A pack is a set of n statements that are:
         * isomorphic
-        * independant
+        * independent
         Statements are named operations in the code.
     """
     def __init__(self, ops):