[pypy-commit] pypy stmgc-c7: more optimal read-barrier placement for non-jit. doesn't improve performance

Raemi noreply at buildbot.pypy.org
Thu Apr 17 17:28:26 CEST 2014


Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: stmgc-c7
Changeset: r70703:5e60afcef785
Date: 2014-04-17 17:25 +0200
http://bitbucket.org/pypy/pypy/changeset/5e60afcef785/

Log:	more optimal read-barrier placement for non-jit. doesn't improve
	performance measurably

diff --git a/rpython/translator/stm/readbarrier.py b/rpython/translator/stm/readbarrier.py
--- a/rpython/translator/stm/readbarrier.py
+++ b/rpython/translator/stm/readbarrier.py
@@ -1,14 +1,17 @@
 from rpython.flowspace.model import SpaceOperation, Constant, Variable
-from rpython.translator.unsimplify import varoftype
 from rpython.rtyper.lltypesystem import lltype
+from rpython.translator.unsimplify import varoftype, insert_empty_block
+from rpython.translator.unsimplify import insert_empty_startblock
+from rpython.translator.simplify import join_blocks
 
 
+MALLOCS = set([
+    'malloc', 'malloc_varsize',
+    'malloc_nonmovable', 'malloc_nonmovable_varsize',
+    ])
+
 READ_OPS = set(['getfield', 'getarrayitem', 'getinteriorfield', 'raw_load'])
 
-
-def is_gc_ptr(T):
-    return isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc'
-
 def unwraplist(list_v):
     for v in list_v:
         if isinstance(v, Constant):
@@ -33,25 +36,38 @@
         return OUTER._immutable_interiorfield(unwraplist(op.args[1:-1]))
     if op.opname in ('raw_load', 'raw_store'):
         return False
+    raise AssertionError(op)
 
+def needs_barrier(frm, to):
+    return to > frm
 
-def insert_stm_read_barrier(transformer, graph):
-    # We need to put enough 'stm_read' in the graph so that any
-    # execution of a READ_OP on some GC object is guaranteed to also
-    # execute either 'stm_read' or 'stm_write' on the same GC object
-    # during the same transaction.
-    #
-    # XXX this can be optimized a lot, but for now we go with the
-    # simplest possible solution...
-    #
-    gcremovetypeptr = transformer.translator.config.translation.gcremovetypeptr
+def is_gc_ptr(T):
+    return isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc'
 
-    for block in graph.iterblocks():
-        if not block.operations:
-            continue
-        newops = []
+
+
+
+class BlockTransformer(object):
+
+    def __init__(self, stmtransformer, block):
+        self.stmtransformer = stmtransformer
+        self.block = block
+        self.patch = None
+        self.inputargs_category = None
+        self.inputargs_category_per_link = {}
+
+    def init_start_block(self):
+        from_outside = ['A'] * len(self.block.inputargs)
+        self.inputargs_category_per_link[None] = from_outside
+        self.update_inputargs_category()
+
+
+    def analyze_inside_block(self, graph):
+        gcremovetypeptr = (
+            self.stmtransformer.translator.config.translation.gcremovetypeptr)
+        wants_a_barrier = {}
         stm_ignored = False
-        for op in block.operations:
+        for op in self.block.operations:
             is_getter = (op.opname in READ_OPS and
                          op.result.concretetype is not lltype.Void and
                          is_gc_ptr(op.args[0].concretetype))
@@ -61,26 +77,188 @@
                 op.args[0].concretetype.TO._hints.get('typeptr')):
                 # typeptr is always immutable
                 pass
-            elif ((op.opname in ('getarraysize', 'getinteriorarraysize') and
-                  is_gc_ptr(op.args[0].concretetype)) or
+            elif (op.opname in ('getarraysize', 'getinteriorarraysize') and
+                  is_gc_ptr(op.args[0].concretetype) or
                   (is_getter and is_immutable(op))):
                 # immutable getters
-                # 'weakref_deref': kind of immutable, but the GC has to see
-                #     which transactions read from a dying weakref, so we
-                #     need the barrier nonetheless...
                 pass
             elif is_getter:
+                # the non-immutable getfields need a regular read barrier
                 if not stm_ignored:
-                    v_none = varoftype(lltype.Void)
-                    newops.append(SpaceOperation('stm_read',
-                                                 [op.args[0]], v_none))
-                    transformer.read_barrier_counts += 1
+                    wants_a_barrier[op] = 'R'
+            elif op.opname == 'weakref_deref':
+                # 'weakref_deref' needs a read barrier if we want to work
+                # around the "weakref issue"
+                assert not stm_ignored
+                wants_a_barrier[op] = 'R'
             elif op.opname == 'stm_ignored_start':
-                assert stm_ignored == False
+                assert not stm_ignored, "nested 'with stm_ignored'"
                 stm_ignored = True
             elif op.opname == 'stm_ignored_stop':
-                assert stm_ignored == True
+                assert stm_ignored, "stm_ignored_stop without start?"
                 stm_ignored = False
-            newops.append(op)
-        assert stm_ignored == False
-        block.operations = newops
+        #
+        if stm_ignored:
+            raise Exception("%r: 'with stm_ignored:' code body too complex"
+                            % (graph,))
+        self.wants_a_barrier = wants_a_barrier
+
+
+    def flow_through_block(self):
+        def cat_fetch(v):
+            return categories.setdefault(v, 'A')
+
+        def get_category_or_null(v):
+            # 'v' is an original variable here, or a constant
+            if isinstance(v, Constant) and not v.value:    # a NULL constant
+                return 'Z'
+            if v in categories:
+                return categories[v]
+            return 'A'
+
+        newoperations = []
+        stmtransformer = self.stmtransformer
+        categories = {}
+
+        # make the initial trivial renamings needed to have some precise
+        # categories for the input args
+        for v, cat in zip(self.block.inputargs, self.inputargs_category):
+            if is_gc_ptr(v.concretetype):
+                assert cat is not None
+                categories[v] = cat
+
+        for op in self.block.operations:
+            if (op.opname in ('cast_pointer', 'same_as') and
+                    is_gc_ptr(op.result.concretetype)):
+                categories[op.result] = cat_fetch(op.args[0])
+                newoperations.append(op)
+                continue
+            #
+            to = self.wants_a_barrier.get(op)
+            if to is not None:
+                v = op.args[0]
+                frm = cat_fetch(v)
+                if needs_barrier(frm, to):
+                    stmtransformer.read_barrier_counts += 1
+                    v_none = varoftype(lltype.Void)
+                    newop = SpaceOperation('stm_read', [v], v_none)
+                    categories[v] = to
+                    newoperations.append(newop)
+            #
+            newoperations.append(op)
+            #
+            if stmtransformer.break_analyzer.analyze(op):
+                # this operation can perform a transaction break:
+                # all references are lowered to 'A' again
+                for v in categories:
+                    categories[v] = 'A'
+
+            if op.opname == 'debug_stm_flush_barrier':
+                for v in categories:
+                    categories[v] = 'A'
+
+            if op.opname in MALLOCS:
+                categories[op.result] = 'R'
+
+        blockoperations = newoperations
+        linkoperations = []
+        for link in self.block.exits:
+            output_categories = []
+            for v in link.args:
+                if is_gc_ptr(v.concretetype):
+                    cat = cat_fetch(v)
+                else:
+                    cat = None
+                output_categories.append(cat)
+            linkoperations.append(output_categories)
+        #
+        # Record how we'd like to patch the block, but don't do any
+        # patching yet
+        self.patch = (blockoperations, linkoperations)
+
+
+    def update_targets(self, block_transformers):
+        (_, linkoperations) = self.patch
+        assert len(linkoperations) == len(self.block.exits)
+        targetbts = []
+        for link, output_categories in zip(self.block.exits, linkoperations):
+            targetblock = link.target
+            if targetblock not in block_transformers:
+                continue      # ignore the exit block
+            targetbt = block_transformers[targetblock]
+            targetbt.inputargs_category_per_link[link] = output_categories
+            if targetbt.update_inputargs_category():
+                targetbts.append(targetbt)
+        return set(targetbts)
+
+    def update_inputargs_category(self):
+        values = self.inputargs_category_per_link.values()
+        newcats = []
+        for i, v in enumerate(self.block.inputargs):
+            if is_gc_ptr(v.concretetype):
+                cats = [output_categories[i] for output_categories in values]
+                assert None not in cats
+                newcats.append(min(cats))
+            else:
+                newcats.append(None)
+        if newcats != self.inputargs_category:
+            self.inputargs_category = newcats
+            return True
+        else:
+            return False
+
+
+    def patch_now(self):
+        if self.patch is None:
+            return
+        newoperations, linkoperations = self.patch
+        self.block.operations = newoperations
+        assert len(linkoperations) == len(self.block.exits)
+        # for link, (newargs, newoperations, _) in zip(self.block.exits,
+        #                                              linkoperations):
+        #     link.args[:] = newargs
+        #     if newoperations:
+        #         # must put them in a fresh block along the link
+        #         annotator = self.stmtransformer.translator.annotator
+        #         insert_empty_block(annotator, link, newoperations)
+
+
+def insert_stm_read_barrier(stmtransformer, graph):
+    """This function uses the following characters for 'categories':
+
+           * 'A': any general pointer
+           * 'R': the read barrier was applied
+           * 'Z': the null constant
+
+       The letters are chosen so that a barrier is needed to change a
+       pointer from category x to category y if and only if y > x.
+    """
+    # XXX: we currently don't use the information that any write
+    # operation on a gcptr will make it readable automatically
+    join_blocks(graph)
+    annotator = stmtransformer.translator.annotator
+    insert_empty_startblock(annotator, graph)
+
+    block_transformers = {}
+
+    for block in graph.iterblocks():
+        if block.operations == ():
+            continue
+        bt = BlockTransformer(stmtransformer, block)
+        bt.analyze_inside_block(graph)
+        block_transformers[block] = bt
+
+    bt = block_transformers[graph.startblock]
+    bt.init_start_block()
+    pending = set([bt])
+
+    while pending:
+        bt = pending.pop()
+        bt.flow_through_block()
+        pending |= bt.update_targets(block_transformers)
+
+    for bt in block_transformers.values():
+        bt.patch_now()
+
+    # needed only for some fragile test ztranslated.test_stm_ignored
+    join_blocks(graph)
diff --git a/rpython/translator/stm/test/test_readbarrier.py b/rpython/translator/stm/test/test_readbarrier.py
--- a/rpython/translator/stm/test/test_readbarrier.py
+++ b/rpython/translator/stm/test/test_readbarrier.py
@@ -1,7 +1,8 @@
 from rpython.rlib.objectmodel import stm_ignored
 from rpython.translator.stm.test.transform_support import BaseTestTransform
-from rpython.rtyper.lltypesystem import lltype
-
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.rstm import register_invoke_around_extcall
+from rpython.rtyper.lltypesystem.lloperation import llop
 
 class TestReadBarrier(BaseTestTransform):
     do_read_barrier = True
@@ -26,6 +27,127 @@
         assert res == 42
         assert self.read_barriers == [x1]
 
+    def test_array_size(self):
+        array_gc = lltype.GcArray(('z', lltype.Signed))
+        array_nongc = lltype.Array(('z', lltype.Signed))
+        Q = lltype.GcStruct('Q',
+                            ('gc', lltype.Ptr(array_gc)),
+                            ('raw', lltype.Ptr(array_nongc)))
+        q = lltype.malloc(Q, immortal=True)
+        q.gc = lltype.malloc(array_gc, n=3, flavor='gc', immortal=True)
+        q.raw = lltype.malloc(array_nongc, n=5, flavor='raw', immortal=True)
+        def f1(n):
+            if n == 1:
+                return len(q.gc)
+            else:
+                return len(q.raw)
+        self.interpret(f1, [1])
+        assert self.read_barriers == [q]
+        self.interpret(f1, [0])
+        assert self.read_barriers == [q]
+
+    def test_simple_read_2(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        x2 = lltype.malloc(X, immortal=True)
+        x2.foo = 81
+        null = lltype.nullptr(X)
+
+        def f1(n):
+            if n < 1:
+                p = null
+            else:
+                p = x2
+            return p.foo
+
+        res = self.interpret(f1, [4])
+        assert res == 81
+        assert self.read_barriers == [x2]
+
+
+    def test_multiple_reads(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed),
+                                 ('bar', lltype.Signed))
+        x1 = lltype.malloc(X, immortal=True)
+        x1.foo = 6
+        x1.bar = 7
+        x2 = lltype.malloc(X, immortal=True)
+        x2.foo = 81
+        x2.bar = -1
+
+        def f1(n):
+            if n > 1:
+                return x2.foo * x2.bar
+            else:
+                return x1.foo * x1.bar
+
+        res = self.interpret(f1, [4])
+        assert res == -81
+        assert self.read_barriers == [x2]
+
+    def test_malloc(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(n):
+            p = lltype.malloc(X)
+            p.foo = n
+
+        self.interpret(f1, [4])
+        assert self.read_barriers == []
+
+    def test_repeat_read_barrier_after_malloc(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        x1 = lltype.malloc(X, immortal=True)
+        x1.foo = 6
+        def f1(n):
+            i = x1.foo
+            lltype.malloc(X)
+            i = x1.foo + i
+            return i
+
+        self.interpret(f1, [4])
+        assert self.read_barriers == [x1]
+
+    def test_call_external_release_gil(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(p):
+            register_invoke_around_extcall()
+            x1 = p.foo
+            external_release_gil()
+            x2 = p.foo
+            return x1 * x2
+
+        x = lltype.malloc(X, immortal=True); x.foo = 6
+        res = self.interpret(f1, [x])
+        assert res == 36
+        assert self.read_barriers == [x, x]
+
+    def test_call_external_any_gcobj(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(p):
+            register_invoke_around_extcall()
+            x1 = p.foo
+            external_any_gcobj()
+            x2 = p.foo
+            return x1 * x2
+
+        x = lltype.malloc(X, immortal=True); x.foo = 6
+        res = self.interpret(f1, [x])
+        assert res == 36
+        assert self.read_barriers == [x]
+
+    def test_call_external_safest(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(p):
+            register_invoke_around_extcall()
+            x1 = p.foo
+            external_safest()
+            x2 = p.foo
+            return x1 * x2
+
+        x = lltype.malloc(X, immortal=True); x.foo = 6
+        res = self.interpret(f1, [x])
+        assert res == 36
+        assert self.read_barriers == [x]
+
     def test_stm_ignored_read(self):
         X = lltype.GcStruct('X', ('foo', lltype.Signed))
         x1 = lltype.malloc(X, immortal=True)
@@ -48,3 +170,19 @@
         res = self.interpret(f1, [2])
         assert res == 42
         assert self.read_barriers == [x1]
+
+
+
+
+external_release_gil = rffi.llexternal('external_release_gil', [], lltype.Void,
+                                       _callable=lambda: None,
+                                       random_effects_on_gcobjs=True,
+                                       releasegil=True)   # GIL is released
+external_any_gcobj = rffi.llexternal('external_any_gcobj', [], lltype.Void,
+                                     _callable=lambda: None,
+                                     random_effects_on_gcobjs=True,
+                                     releasegil=False)   # GIL is not released
+external_safest = rffi.llexternal('external_safest', [], lltype.Void,
+                                  _callable=lambda: None,
+                                  random_effects_on_gcobjs=False,
+                                  releasegil=False)   # GIL is not released
diff --git a/rpython/translator/stm/test/transform_support.py b/rpython/translator/stm/test/transform_support.py
--- a/rpython/translator/stm/test/transform_support.py
+++ b/rpython/translator/stm/test/transform_support.py
@@ -103,12 +103,6 @@
 
     def op_setfield(self, obj, fieldname, fieldvalue):
         if obj._TYPE.TO._gckind == 'gc':
-            T = lltype.typeOf(fieldvalue)
-            if isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc':
-                self.check_category(obj, 'W')
-            else:
-                self.check_category(obj, 'V')
-            # convert R -> Q all other pointers to the same object we can find
             for p in self.all_stm_ptrs():
                 if p._category == 'R' and p._T == obj._T and p == obj:
                     _stmptr._category.__set__(p, 'Q')
@@ -138,7 +132,6 @@
                 _stmptr._category.__set__(p, 'V')
         p = LLFrame.op_malloc(self, obj, flags)
         ptr2 = _stmptr(p, 'W')
-        self.llinterpreter.tester.writemode.add(ptr2._obj)
         return ptr2
 
     def transaction_break(self):
@@ -147,6 +140,13 @@
             if p._category > 'I':
                 _stmptr._category.__set__(p, 'I')
 
+
+    def op_stm_commit_if_not_atomic(self):
+        self.transaction_break()
+
+    def op_stm_start_inevitable_if_not_atomic(self):
+        self.transaction_break()
+
     def op_stm_commit_transaction(self):
         self.transaction_break()
 
diff --git a/rpython/translator/stm/transform.py b/rpython/translator/stm/transform.py
--- a/rpython/translator/stm/transform.py
+++ b/rpython/translator/stm/transform.py
@@ -2,6 +2,7 @@
 from rpython.translator.stm.readbarrier import insert_stm_read_barrier
 from rpython.translator.stm.jitdriver import reorganize_around_jit_driver
 from rpython.translator.stm.threadlocalref import transform_tlref
+from rpython.translator.stm.breakfinder import TransactionBreakAnalyzer
 from rpython.translator.c.support import log
 
 
@@ -29,10 +30,12 @@
         self.print_logs(3)
 
     def transform_read_barrier(self):
+        self.break_analyzer = TransactionBreakAnalyzer(self.translator)
         self.read_barrier_counts = 0
         for graph in self.translator.graphs:
             insert_stm_read_barrier(self, graph)
         log.info("%d read barriers inserted" % (self.read_barrier_counts,))
+        del self.break_analyzer
 
     def transform_turn_inevitable(self):
         for graph in self.translator.graphs:


More information about the pypy-commit mailing list