[pypy-commit] pypy default: Support writing inline code in the write barrier for large arrays,
arigo
noreply at buildbot.pypy.org
Sat Jul 23 23:24:37 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r45920:4c0d2555caa8
Date: 2011-07-23 19:55 +0200
http://bitbucket.org/pypy/pypy/changeset/4c0d2555caa8/
Log: Support writing inline code in the write barrier for large arrays,
for the simple case where calling it would just set a flag just
before the object.
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -453,21 +453,33 @@
class WriteBarrierDescr(AbstractDescr):
def __init__(self, gc_ll_descr):
+ GCClass = gc_ll_descr.GCClass
self.llop1 = gc_ll_descr.llop1
self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
- self.fielddescr_tid = get_field_descr(gc_ll_descr,
- gc_ll_descr.GCClass.HDR, 'tid')
- self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
- # if convenient for the backend, we also compute the info about
+ self.fielddescr_tid = get_field_descr(gc_ll_descr, GCClass.HDR, 'tid')
+ #
+ self.jit_wb_if_flag = GCClass.JIT_WB_IF_FLAG
+ self.jit_wb_if_flag_byteofs, self.jit_wb_if_flag_singlebyte = (
+ self.extract_flag_byte(self.jit_wb_if_flag))
+ #
+ if hasattr(GCClass, 'JIT_WB_CARDS_SET'):
+ self.jit_wb_cards_set = GCClass.JIT_WB_CARDS_SET
+ self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT
+ self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = (
+ self.extract_flag_byte(self.jit_wb_cards_set))
+ else:
+ self.jit_wb_cards_set = 0
+
+ def extract_flag_byte(self, flag_word):
+ # if convenient for the backend, we compute the info about
# the flag as (byte-offset, single-byte-flag).
import struct
- value = struct.pack("l", self.jit_wb_if_flag)
+ value = struct.pack("l", flag_word)
assert value.count('\x00') == len(value) - 1 # only one byte is != 0
i = 0
while value[i] == '\x00': i += 1
- self.jit_wb_if_flag_byteofs = i
- self.jit_wb_if_flag_singlebyte = struct.unpack('b', value[i])[0]
+ return (i, struct.unpack('b', value[i])[0])
def get_write_barrier_fn(self, cpu):
llop1 = self.llop1
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1707,6 +1707,7 @@
jit_wb_if_flag = 4096
jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
jit_wb_if_flag_singlebyte = 0x10
+ jit_wb_cards_set = 0
def get_write_barrier_from_array_fn(self, cpu):
return funcbox.getint()
#
@@ -1728,6 +1729,72 @@
else:
assert record == []
+ def test_cond_call_gc_wb_array_card_marking_fast_path(self):
+ def func_void(a, b, c):
+ record.append((a, b, c))
+ record = []
+ #
+ S = lltype.Struct('S', ('tid', lltype.Signed))
+ S_WITH_CARDS = lltype.Struct('S_WITH_CARDS',
+ ('card0', lltype.Char),
+ ('card1', lltype.Char),
+ ('card2', lltype.Char),
+ ('card3', lltype.Char),
+ ('card4', lltype.Char),
+ ('card5', lltype.Char),
+ ('card6', lltype.Char),
+ ('card7', lltype.Char),
+ ('data', S))
+ FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
+ lltype.Void)
+ func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+ funcbox = self.get_funcbox(self.cpu, func_ptr)
+ class WriteBarrierDescr(AbstractDescr):
+ jit_wb_if_flag = 4096
+ jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+ jit_wb_if_flag_singlebyte = 0x10
+ jit_wb_cards_set = 8192
+ jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20')
+ jit_wb_cards_set_singlebyte = 0x20
+ jit_wb_card_page_shift = 7
+ def get_write_barrier_from_array_fn(self, cpu):
+ return funcbox.getint()
+ #
+ for BoxIndexCls in [BoxInt, ConstInt]:
+ for cond in [False, True]:
+ print
+ print '_'*79
+ print 'BoxIndexCls =', BoxIndexCls
+ print 'JIT_WB_CARDS_SET =', cond
+ print
+ value = random.randrange(-sys.maxint, sys.maxint)
+ value |= 4096
+ if cond:
+ value |= 8192
+ else:
+ value &= ~8192
+ s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True)
+ s.data.tid = value
+ sgcref = rffi.cast(llmemory.GCREF, s.data)
+ del record[:]
+ box_index = BoxIndexCls((9<<7) + 17)
+ self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
+ [BoxPtr(sgcref), box_index, BoxPtr(sgcref)],
+ 'void', descr=WriteBarrierDescr())
+ if cond:
+ assert record == []
+ assert s.card6 == '\x02'
+ else:
+ assert record == [(s.data, (9<<7) + 17, s.data)]
+ assert s.card6 == '\x00'
+ assert s.card0 == '\x00'
+ assert s.card1 == '\x00'
+ assert s.card2 == '\x00'
+ assert s.card3 == '\x00'
+ assert s.card4 == '\x00'
+ assert s.card5 == '\x00'
+ assert s.card7 == '\x00'
+
def test_force_operations_returning_void(self):
values = []
def maybe_force(token, flag):
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -2246,10 +2246,12 @@
if opnum == rop.COND_CALL_GC_WB:
N = 2
func = descr.get_write_barrier_fn(self.cpu)
+ card_marking = False
elif opnum == rop.COND_CALL_GC_WB_ARRAY:
N = 3
func = descr.get_write_barrier_from_array_fn(self.cpu)
assert func != 0
+ card_marking = descr.jit_wb_cards_set != 0
else:
raise AssertionError(opnum)
#
@@ -2258,6 +2260,18 @@
imm(descr.jit_wb_if_flag_singlebyte))
self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
jz_location = self.mc.get_relative_pos()
+
+ # for cond_call_gc_wb_array, also add another fast path:
+ # if GCFLAG_CARDS_SET, then we can just set one bit and be done
+ if card_marking:
+ self.mc.TEST8(addr_add_const(loc_base,
+ descr.jit_wb_cards_set_byteofs),
+ imm(descr.jit_wb_cards_set_singlebyte))
+ self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
+ jnz_location = self.mc.get_relative_pos()
+ else:
+ jnz_location = 0
+
# the following is supposed to be the slow path, so whenever possible
# we choose the most compact encoding over the most efficient one.
if IS_X86_32:
@@ -2297,6 +2311,43 @@
loc = arglocs[i]
assert isinstance(loc, RegLoc)
self.mc.POP_r(loc.value)
+
+ # if GCFLAG_CARDS_SET, then we can do the whole thing that would
+ # be done in the CALL above with just four instructions, so here
+ # is an inline copy of them
+ if card_marking:
+ self.mc.JMP_l8(0) # jump to the exit, patched later
+ jmp_location = self.mc.get_relative_pos()
+ # patch the JNZ above
+ offset = self.mc.get_relative_pos() - jnz_location
+ assert 0 < offset <= 127
+ self.mc.overwrite(jnz_location-1, chr(offset))
+ #
+ loc_index = arglocs[1]
+ if isinstance(loc_index, RegLoc):
+ # choose a scratch register
+ tmp1 = loc_index
+ self.mc.PUSH_r(tmp1.value)
+ # SHR tmp, card_page_shift
+ self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
+ # XOR tmp, -8
+ self.mc.XOR_ri(tmp1.value, -8)
+ # BTS [loc_base], tmp
+ self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
+ # done
+ self.mc.POP_r(tmp1.value)
+ elif isinstance(loc_index, ImmedLoc):
+ byte_index = loc_index.value >> descr.jit_wb_card_page_shift
+ byte_ofs = ~(byte_index >> 3)
+ byte_val = 1 << (byte_index & 7)
+ self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
+ else:
+ raise AssertionError("index is neither RegLoc nor ImmedLoc")
+ # patch the JMP above
+ offset = self.mc.get_relative_pos() - jmp_location
+ assert 0 < offset <= 127
+ self.mc.overwrite(jmp_location-1, chr(offset))
+ #
# patch the JZ above
offset = self.mc.get_relative_pos() - jz_location
assert 0 < offset <= 127
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -524,6 +524,76 @@
def test_compile_framework_8(self):
self.run('compile_framework_8')
+ def define_compile_framework_9(cls):
+ # Like compile_framework_8, but with variable indexes and large
+ # arrays, testing the card_marking case
+ def before(n, x):
+ return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+ def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+ if n < 1900:
+ check(l[0].x == 123)
+ num = 512 + (n & 7)
+ l = [None] * num
+ l[0] = X(123)
+ l[1] = X(n)
+ l[2] = X(n+10)
+ l[3] = X(n+20)
+ l[4] = X(n+30)
+ l[5] = X(n+40)
+ l[6] = X(n+50)
+ l[7] = X(n+60)
+ l[num-8] = X(n+70)
+ l[num-9] = X(n+80)
+ l[num-10] = X(n+90)
+ l[num-11] = X(n+100)
+ l[-12] = X(n+110)
+ l[-13] = X(n+120)
+ l[-14] = X(n+130)
+ l[-15] = X(n+140)
+ if n < 1800:
+ num = 512 + (n & 7)
+ check(len(l) == num)
+ check(l[0].x == 123)
+ check(l[1].x == n)
+ check(l[2].x == n+10)
+ check(l[3].x == n+20)
+ check(l[4].x == n+30)
+ check(l[5].x == n+40)
+ check(l[6].x == n+50)
+ check(l[7].x == n+60)
+ check(l[num-8].x == n+70)
+ check(l[num-9].x == n+80)
+ check(l[num-10].x == n+90)
+ check(l[num-11].x == n+100)
+ check(l[-12].x == n+110)
+ check(l[-13].x == n+120)
+ check(l[-14].x == n+130)
+ check(l[-15].x == n+140)
+ n -= x.foo
+ return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+ def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+ check(len(l) >= 512)
+ check(l[0].x == 123)
+ check(l[1].x == 2)
+ check(l[2].x == 12)
+ check(l[3].x == 22)
+ check(l[4].x == 32)
+ check(l[5].x == 42)
+ check(l[6].x == 52)
+ check(l[7].x == 62)
+ check(l[-8].x == 72)
+ check(l[-9].x == 82)
+ check(l[-10].x == 92)
+ check(l[-11].x == 102)
+ check(l[-12].x == 112)
+ check(l[-13].x == 122)
+ check(l[-14].x == 132)
+ check(l[-15].x == 142)
+ return before, f, after
+
+ def test_compile_framework_9(self):
+ self.run('compile_framework_9')
+
def define_compile_framework_external_exception_handling(cls):
def before(n, x):
x = X(0)
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -920,6 +920,20 @@
# "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
JIT_WB_IF_FLAG = GCFLAG_TRACK_YOUNG_PTRS
+ # for the JIT to generate custom code corresponding to the array
+ # write barrier for the simplest case of cards. If JIT_CARDS_SET
+ # is already set on an object, it will execute code like this:
+ # MOV eax, index
+ # SHR eax, JIT_WB_CARD_PAGE_SHIFT
+ # XOR eax, -8
+ # BTS [object], eax
+ if TRANSLATION_PARAMS['card_page_indices'] > 0:
+ JIT_WB_CARDS_SET = GCFLAG_CARDS_SET
+ JIT_WB_CARD_PAGE_SHIFT = 1
+ while ((1 << JIT_WB_CARD_PAGE_SHIFT) !=
+ TRANSLATION_PARAMS['card_page_indices']):
+ JIT_WB_CARD_PAGE_SHIFT += 1
+
@classmethod
def JIT_max_size_of_young_obj(cls):
return cls.TRANSLATION_PARAMS['large_object']
More information about the pypy-commit
mailing list