[pypy-commit] pypy stmgc-c7: more optimal read-barrier placement for non-jit. doesn't improve performance
Raemi
noreply at buildbot.pypy.org
Thu Apr 17 17:28:26 CEST 2014
Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: stmgc-c7
Changeset: r70703:5e60afcef785
Date: 2014-04-17 17:25 +0200
http://bitbucket.org/pypy/pypy/changeset/5e60afcef785/
Log: more optimal read-barrier placement for non-jit. doesn't improve
performance measurably
diff --git a/rpython/translator/stm/readbarrier.py b/rpython/translator/stm/readbarrier.py
--- a/rpython/translator/stm/readbarrier.py
+++ b/rpython/translator/stm/readbarrier.py
@@ -1,14 +1,17 @@
from rpython.flowspace.model import SpaceOperation, Constant, Variable
-from rpython.translator.unsimplify import varoftype
from rpython.rtyper.lltypesystem import lltype
+from rpython.translator.unsimplify import varoftype, insert_empty_block
+from rpython.translator.unsimplify import insert_empty_startblock
+from rpython.translator.simplify import join_blocks
+MALLOCS = set([
+ 'malloc', 'malloc_varsize',
+ 'malloc_nonmovable', 'malloc_nonmovable_varsize',
+ ])
+
READ_OPS = set(['getfield', 'getarrayitem', 'getinteriorfield', 'raw_load'])
-
-def is_gc_ptr(T):
- return isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc'
-
def unwraplist(list_v):
for v in list_v:
if isinstance(v, Constant):
@@ -33,25 +36,38 @@
return OUTER._immutable_interiorfield(unwraplist(op.args[1:-1]))
if op.opname in ('raw_load', 'raw_store'):
return False
+ raise AssertionError(op)
+def needs_barrier(frm, to):
+ return to > frm
-def insert_stm_read_barrier(transformer, graph):
- # We need to put enough 'stm_read' in the graph so that any
- # execution of a READ_OP on some GC object is guaranteed to also
- # execute either 'stm_read' or 'stm_write' on the same GC object
- # during the same transaction.
- #
- # XXX this can be optimized a lot, but for now we go with the
- # simplest possible solution...
- #
- gcremovetypeptr = transformer.translator.config.translation.gcremovetypeptr
+def is_gc_ptr(T):
+ return isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc'
- for block in graph.iterblocks():
- if not block.operations:
- continue
- newops = []
+
+
+
+class BlockTransformer(object):
+
+ def __init__(self, stmtransformer, block):
+ self.stmtransformer = stmtransformer
+ self.block = block
+ self.patch = None
+ self.inputargs_category = None
+ self.inputargs_category_per_link = {}
+
+ def init_start_block(self):
+ from_outside = ['A'] * len(self.block.inputargs)
+ self.inputargs_category_per_link[None] = from_outside
+ self.update_inputargs_category()
+
+
+ def analyze_inside_block(self, graph):
+ gcremovetypeptr = (
+ self.stmtransformer.translator.config.translation.gcremovetypeptr)
+ wants_a_barrier = {}
stm_ignored = False
- for op in block.operations:
+ for op in self.block.operations:
is_getter = (op.opname in READ_OPS and
op.result.concretetype is not lltype.Void and
is_gc_ptr(op.args[0].concretetype))
@@ -61,26 +77,188 @@
op.args[0].concretetype.TO._hints.get('typeptr')):
# typeptr is always immutable
pass
- elif ((op.opname in ('getarraysize', 'getinteriorarraysize') and
- is_gc_ptr(op.args[0].concretetype)) or
+ elif (op.opname in ('getarraysize', 'getinteriorarraysize') and
+ is_gc_ptr(op.args[0].concretetype) or
(is_getter and is_immutable(op))):
# immutable getters
- # 'weakref_deref': kind of immutable, but the GC has to see
- # which transactions read from a dying weakref, so we
- # need the barrier nonetheless...
pass
elif is_getter:
+ # the non-immutable getfields need a regular read barrier
if not stm_ignored:
- v_none = varoftype(lltype.Void)
- newops.append(SpaceOperation('stm_read',
- [op.args[0]], v_none))
- transformer.read_barrier_counts += 1
+ wants_a_barrier[op] = 'R'
+ elif op.opname == 'weakref_deref':
+ # 'weakref_deref' needs a read barrier if we want to work
+ # around the "weakref issue"
+ assert not stm_ignored
+ wants_a_barrier[op] = 'R'
elif op.opname == 'stm_ignored_start':
- assert stm_ignored == False
+ assert not stm_ignored, "nested 'with stm_ignored'"
stm_ignored = True
elif op.opname == 'stm_ignored_stop':
- assert stm_ignored == True
+ assert stm_ignored, "stm_ignored_stop without start?"
stm_ignored = False
- newops.append(op)
- assert stm_ignored == False
- block.operations = newops
+ #
+ if stm_ignored:
+ raise Exception("%r: 'with stm_ignored:' code body too complex"
+ % (graph,))
+ self.wants_a_barrier = wants_a_barrier
+
+
+ def flow_through_block(self):
+ def cat_fetch(v):
+ return categories.setdefault(v, 'A')
+
+ def get_category_or_null(v):
+ # 'v' is an original variable here, or a constant
+ if isinstance(v, Constant) and not v.value: # a NULL constant
+ return 'Z'
+ if v in categories:
+ return categories[v]
+ return 'A'
+
+ newoperations = []
+ stmtransformer = self.stmtransformer
+ categories = {}
+
+ # make the initial trivial renamings needed to have some precise
+ # categories for the input args
+ for v, cat in zip(self.block.inputargs, self.inputargs_category):
+ if is_gc_ptr(v.concretetype):
+ assert cat is not None
+ categories[v] = cat
+
+ for op in self.block.operations:
+ if (op.opname in ('cast_pointer', 'same_as') and
+ is_gc_ptr(op.result.concretetype)):
+ categories[op.result] = cat_fetch(op.args[0])
+ newoperations.append(op)
+ continue
+ #
+ to = self.wants_a_barrier.get(op)
+ if to is not None:
+ v = op.args[0]
+ frm = cat_fetch(v)
+ if needs_barrier(frm, to):
+ stmtransformer.read_barrier_counts += 1
+ v_none = varoftype(lltype.Void)
+ newop = SpaceOperation('stm_read', [v], v_none)
+ categories[v] = to
+ newoperations.append(newop)
+ #
+ newoperations.append(op)
+ #
+ if stmtransformer.break_analyzer.analyze(op):
+ # this operation can perform a transaction break:
+ # all references are lowered to 'A' again
+ for v in categories:
+ categories[v] = 'A'
+
+ if op.opname == 'debug_stm_flush_barrier':
+ for v in categories:
+ categories[v] = 'A'
+
+ if op.opname in MALLOCS:
+ categories[op.result] = 'R'
+
+ blockoperations = newoperations
+ linkoperations = []
+ for link in self.block.exits:
+ output_categories = []
+ for v in link.args:
+ if is_gc_ptr(v.concretetype):
+ cat = cat_fetch(v)
+ else:
+ cat = None
+ output_categories.append(cat)
+ linkoperations.append(output_categories)
+ #
+ # Record how we'd like to patch the block, but don't do any
+ # patching yet
+ self.patch = (blockoperations, linkoperations)
+
+
+ def update_targets(self, block_transformers):
+ (_, linkoperations) = self.patch
+ assert len(linkoperations) == len(self.block.exits)
+ targetbts = []
+ for link, output_categories in zip(self.block.exits, linkoperations):
+ targetblock = link.target
+ if targetblock not in block_transformers:
+ continue # ignore the exit block
+ targetbt = block_transformers[targetblock]
+ targetbt.inputargs_category_per_link[link] = output_categories
+ if targetbt.update_inputargs_category():
+ targetbts.append(targetbt)
+ return set(targetbts)
+
+ def update_inputargs_category(self):
+ values = self.inputargs_category_per_link.values()
+ newcats = []
+ for i, v in enumerate(self.block.inputargs):
+ if is_gc_ptr(v.concretetype):
+ cats = [output_categories[i] for output_categories in values]
+ assert None not in cats
+ newcats.append(min(cats))
+ else:
+ newcats.append(None)
+ if newcats != self.inputargs_category:
+ self.inputargs_category = newcats
+ return True
+ else:
+ return False
+
+
+ def patch_now(self):
+ if self.patch is None:
+ return
+ newoperations, linkoperations = self.patch
+ self.block.operations = newoperations
+ assert len(linkoperations) == len(self.block.exits)
+ # for link, (newargs, newoperations, _) in zip(self.block.exits,
+ # linkoperations):
+ # link.args[:] = newargs
+ # if newoperations:
+ # # must put them in a fresh block along the link
+ # annotator = self.stmtransformer.translator.annotator
+ # insert_empty_block(annotator, link, newoperations)
+
+
+def insert_stm_read_barrier(stmtransformer, graph):
+ """This function uses the following characters for 'categories':
+
+ * 'A': any general pointer
+ * 'R': the read barrier was applied
+ * 'Z': the null constant
+
+ The letters are chosen so that a barrier is needed to change a
+ pointer from category x to category y if and only if y > x.
+ """
+ # XXX: we currently don't use the information that any write
+ # operation on a gcptr will make it readable automatically
+ join_blocks(graph)
+ annotator = stmtransformer.translator.annotator
+ insert_empty_startblock(annotator, graph)
+
+ block_transformers = {}
+
+ for block in graph.iterblocks():
+ if block.operations == ():
+ continue
+ bt = BlockTransformer(stmtransformer, block)
+ bt.analyze_inside_block(graph)
+ block_transformers[block] = bt
+
+ bt = block_transformers[graph.startblock]
+ bt.init_start_block()
+ pending = set([bt])
+
+ while pending:
+ bt = pending.pop()
+ bt.flow_through_block()
+ pending |= bt.update_targets(block_transformers)
+
+ for bt in block_transformers.values():
+ bt.patch_now()
+
+ # needed only for some fragile test ztranslated.test_stm_ignored
+ join_blocks(graph)
diff --git a/rpython/translator/stm/test/test_readbarrier.py b/rpython/translator/stm/test/test_readbarrier.py
--- a/rpython/translator/stm/test/test_readbarrier.py
+++ b/rpython/translator/stm/test/test_readbarrier.py
@@ -1,7 +1,8 @@
from rpython.rlib.objectmodel import stm_ignored
from rpython.translator.stm.test.transform_support import BaseTestTransform
-from rpython.rtyper.lltypesystem import lltype
-
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.rstm import register_invoke_around_extcall
+from rpython.rtyper.lltypesystem.lloperation import llop
class TestReadBarrier(BaseTestTransform):
do_read_barrier = True
@@ -26,6 +27,127 @@
assert res == 42
assert self.read_barriers == [x1]
+ def test_array_size(self):
+ array_gc = lltype.GcArray(('z', lltype.Signed))
+ array_nongc = lltype.Array(('z', lltype.Signed))
+ Q = lltype.GcStruct('Q',
+ ('gc', lltype.Ptr(array_gc)),
+ ('raw', lltype.Ptr(array_nongc)))
+ q = lltype.malloc(Q, immortal=True)
+ q.gc = lltype.malloc(array_gc, n=3, flavor='gc', immortal=True)
+ q.raw = lltype.malloc(array_nongc, n=5, flavor='raw', immortal=True)
+ def f1(n):
+ if n == 1:
+ return len(q.gc)
+ else:
+ return len(q.raw)
+ self.interpret(f1, [1])
+ assert self.read_barriers == [q]
+ self.interpret(f1, [0])
+ assert self.read_barriers == [q]
+
+ def test_simple_read_2(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed))
+ x2 = lltype.malloc(X, immortal=True)
+ x2.foo = 81
+ null = lltype.nullptr(X)
+
+ def f1(n):
+ if n < 1:
+ p = null
+ else:
+ p = x2
+ return p.foo
+
+ res = self.interpret(f1, [4])
+ assert res == 81
+ assert self.read_barriers == [x2]
+
+
+ def test_multiple_reads(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed),
+ ('bar', lltype.Signed))
+ x1 = lltype.malloc(X, immortal=True)
+ x1.foo = 6
+ x1.bar = 7
+ x2 = lltype.malloc(X, immortal=True)
+ x2.foo = 81
+ x2.bar = -1
+
+ def f1(n):
+ if n > 1:
+ return x2.foo * x2.bar
+ else:
+ return x1.foo * x1.bar
+
+ res = self.interpret(f1, [4])
+ assert res == -81
+ assert self.read_barriers == [x2]
+
+ def test_malloc(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed))
+ def f1(n):
+ p = lltype.malloc(X)
+ p.foo = n
+
+ self.interpret(f1, [4])
+ assert self.read_barriers == []
+
+ def test_repeat_read_barrier_after_malloc(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed))
+ x1 = lltype.malloc(X, immortal=True)
+ x1.foo = 6
+ def f1(n):
+ i = x1.foo
+ lltype.malloc(X)
+ i = x1.foo + i
+ return i
+
+ self.interpret(f1, [4])
+ assert self.read_barriers == [x1]
+
+ def test_call_external_release_gil(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed))
+ def f1(p):
+ register_invoke_around_extcall()
+ x1 = p.foo
+ external_release_gil()
+ x2 = p.foo
+ return x1 * x2
+
+ x = lltype.malloc(X, immortal=True); x.foo = 6
+ res = self.interpret(f1, [x])
+ assert res == 36
+ assert self.read_barriers == [x, x]
+
+ def test_call_external_any_gcobj(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed))
+ def f1(p):
+ register_invoke_around_extcall()
+ x1 = p.foo
+ external_any_gcobj()
+ x2 = p.foo
+ return x1 * x2
+
+ x = lltype.malloc(X, immortal=True); x.foo = 6
+ res = self.interpret(f1, [x])
+ assert res == 36
+ assert self.read_barriers == [x]
+
+ def test_call_external_safest(self):
+ X = lltype.GcStruct('X', ('foo', lltype.Signed))
+ def f1(p):
+ register_invoke_around_extcall()
+ x1 = p.foo
+ external_safest()
+ x2 = p.foo
+ return x1 * x2
+
+ x = lltype.malloc(X, immortal=True); x.foo = 6
+ res = self.interpret(f1, [x])
+ assert res == 36
+ assert self.read_barriers == [x]
+
def test_stm_ignored_read(self):
X = lltype.GcStruct('X', ('foo', lltype.Signed))
x1 = lltype.malloc(X, immortal=True)
@@ -48,3 +170,19 @@
res = self.interpret(f1, [2])
assert res == 42
assert self.read_barriers == [x1]
+
+
+
+
+external_release_gil = rffi.llexternal('external_release_gil', [], lltype.Void,
+ _callable=lambda: None,
+ random_effects_on_gcobjs=True,
+ releasegil=True) # GIL is released
+external_any_gcobj = rffi.llexternal('external_any_gcobj', [], lltype.Void,
+ _callable=lambda: None,
+ random_effects_on_gcobjs=True,
+ releasegil=False) # GIL is not released
+external_safest = rffi.llexternal('external_safest', [], lltype.Void,
+ _callable=lambda: None,
+ random_effects_on_gcobjs=False,
+ releasegil=False) # GIL is not released
diff --git a/rpython/translator/stm/test/transform_support.py b/rpython/translator/stm/test/transform_support.py
--- a/rpython/translator/stm/test/transform_support.py
+++ b/rpython/translator/stm/test/transform_support.py
@@ -103,12 +103,6 @@
def op_setfield(self, obj, fieldname, fieldvalue):
if obj._TYPE.TO._gckind == 'gc':
- T = lltype.typeOf(fieldvalue)
- if isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc':
- self.check_category(obj, 'W')
- else:
- self.check_category(obj, 'V')
- # convert R -> Q all other pointers to the same object we can find
for p in self.all_stm_ptrs():
if p._category == 'R' and p._T == obj._T and p == obj:
_stmptr._category.__set__(p, 'Q')
@@ -138,7 +132,6 @@
_stmptr._category.__set__(p, 'V')
p = LLFrame.op_malloc(self, obj, flags)
ptr2 = _stmptr(p, 'W')
- self.llinterpreter.tester.writemode.add(ptr2._obj)
return ptr2
def transaction_break(self):
@@ -147,6 +140,13 @@
if p._category > 'I':
_stmptr._category.__set__(p, 'I')
+
+ def op_stm_commit_if_not_atomic(self):
+ self.transaction_break()
+
+ def op_stm_start_inevitable_if_not_atomic(self):
+ self.transaction_break()
+
def op_stm_commit_transaction(self):
self.transaction_break()
diff --git a/rpython/translator/stm/transform.py b/rpython/translator/stm/transform.py
--- a/rpython/translator/stm/transform.py
+++ b/rpython/translator/stm/transform.py
@@ -2,6 +2,7 @@
from rpython.translator.stm.readbarrier import insert_stm_read_barrier
from rpython.translator.stm.jitdriver import reorganize_around_jit_driver
from rpython.translator.stm.threadlocalref import transform_tlref
+from rpython.translator.stm.breakfinder import TransactionBreakAnalyzer
from rpython.translator.c.support import log
@@ -29,10 +30,12 @@
self.print_logs(3)
def transform_read_barrier(self):
+ self.break_analyzer = TransactionBreakAnalyzer(self.translator)
self.read_barrier_counts = 0
for graph in self.translator.graphs:
insert_stm_read_barrier(self, graph)
log.info("%d read barriers inserted" % (self.read_barrier_counts,))
+ del self.break_analyzer
def transform_turn_inevitable(self):
for graph in self.translator.graphs:
More information about the pypy-commit
mailing list