[pypy-commit] pypy default: Tweak the array write barrier: from the JIT, make it take all 3 arguments
arigo
noreply at buildbot.pypy.org
Mon Jun 13 20:31:53 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r44911:db949db5be62
Date: 2011-06-13 19:57 +0200
http://bitbucket.org/pypy/pypy/changeset/db949db5be62/
Log: Tweak the array write barrier: from the JIT, make it take all 3
arguments again. Tentative, trying to fix the "chaos" benchmark
slow-down.
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -552,7 +552,7 @@
self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
[llmemory.Address, llmemory.Address], lltype.Void))
self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
- [llmemory.Address, lltype.Signed], lltype.Void))
+ [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
self.write_barrier_descr = WriteBarrierDescr(self)
#
def malloc_array(itemsize, tid, num_elem):
@@ -763,10 +763,8 @@
newops.append(op)
return newops
- def _gen_write_barrier(self, newops, v_base, v_value_or_index):
- # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
- # (regular case), or an index (case of write_barrier_from_array)
- args = [v_base, v_value_or_index]
+ def _gen_write_barrier(self, newops, v_base, v_value):
+ args = [v_base, v_value]
newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
descr=self.write_barrier_descr))
@@ -780,7 +778,10 @@
length = known_lengths.get(v_base, LARGE)
if length >= LARGE:
# unknown or too big: produce a write_barrier_from_array
- self._gen_write_barrier(newops, v_base, v_index)
+ args = [v_base, v_value, v_index]
+ newops.append(ResOperation(rop.COND_CALL_GC_WB_ARRAY, args,
+ None,
+ descr=self.write_barrier_descr))
return
# fall-back case: produce a write_barrier
self._gen_write_barrier(newops, v_base, v_value)
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -553,12 +553,15 @@
del operations[:2]
assert len(operations) == 2
#
- assert operations[0].getopnum() == rop.COND_CALL_GC_WB
- assert operations[0].getarg(0) == v_base
if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+ assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+ assert operations[0].getarg(0) == v_base
assert operations[0].getarg(1) == v_value
else:
- assert operations[0].getarg(1) == v_index
+ assert operations[0].getopnum() == rop.COND_CALL_GC_WB_ARRAY
+ assert operations[0].getarg(0) == v_base
+ assert operations[0].getarg(1) == v_value
+ assert operations[0].getarg(2) == v_index
assert operations[0].result is None
#
assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -2223,15 +2223,26 @@
def genop_discard_cond_call_gc_wb(self, op, arglocs):
# Write code equivalent to write_barrier() in the GC: it checks
# a flag in the object at arglocs[0], and if set, it calls the
- # function remember_young_pointer() from the GC. The two arguments
- # to the call are in arglocs[:2]. The rest, arglocs[2:], contains
+ # function remember_young_pointer() from the GC. The arguments
+ # to the call are in arglocs[:N]. The rest, arglocs[N:], contains
# registers that need to be saved and restored across the call.
- # If op.getarg(1) is a int, it is an array index and we must call
- # instead remember_young_pointer_from_array().
+ # N is either 2 (regular write barrier) or 3 (array write barrier).
descr = op.getdescr()
if we_are_translated():
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
+ #
+ opnum = op.getopnum()
+ if opnum == rop.COND_CALL_GC_WB:
+ N = 2
+ func = descr.get_write_barrier_fn(self.cpu)
+ elif opnum == rop.COND_CALL_GC_WB_ARRAY:
+ N = 3
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ assert func != 0
+ else:
+ raise AssertionError(opnum)
+ #
loc_base = arglocs[0]
self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
imm(descr.jit_wb_if_flag_singlebyte))
@@ -2242,29 +2253,27 @@
if IS_X86_32:
limit = -1 # push all arglocs on the stack
elif IS_X86_64:
- limit = 1 # push only arglocs[2:] on the stack
+ limit = N - 1 # push only arglocs[N:] on the stack
for i in range(len(arglocs)-1, limit, -1):
loc = arglocs[i]
if isinstance(loc, RegLoc):
self.mc.PUSH_r(loc.value)
else:
- assert not IS_X86_64 # there should only be regs in arglocs[2:]
+ assert not IS_X86_64 # there should only be regs in arglocs[N:]
self.mc.PUSH_i32(loc.getint())
if IS_X86_64:
# We clobber these registers to pass the arguments, but that's
# okay, because consider_cond_call_gc_wb makes sure that any
# caller-save registers with values in them are present in
- # arglocs[2:] too, so they are saved on the stack above and
+ # arglocs[N:] too, so they are saved on the stack above and
# restored below.
- remap_frame_layout(self, arglocs[:2], [edi, esi],
+ if N == 2:
+ callargs = [edi, esi]
+ else:
+ callargs = [edi, esi, edx]
+ remap_frame_layout(self, arglocs[:N], callargs,
X86_64_SCRATCH_REG)
-
- if op.getarg(1).type == INT:
- func = descr.get_write_barrier_from_array_fn(self.cpu)
- assert func != 0
- else:
- func = descr.get_write_barrier_fn(self.cpu)
-
+ #
# misaligned stack in the call, but it's ok because the write barrier
# is not going to call anything more. Also, this assumes that the
# write barrier does not touch the xmm registers. (Slightly delicate
@@ -2273,8 +2282,8 @@
# be done properly)
self.mc.CALL(imm(func))
if IS_X86_32:
- self.mc.ADD_ri(esp.value, 2*WORD)
- for i in range(2, len(arglocs)):
+ self.mc.ADD_ri(esp.value, N*WORD)
+ for i in range(N, len(arglocs)):
loc = arglocs[i]
assert isinstance(loc, RegLoc)
self.mc.POP_r(loc.value)
@@ -2283,6 +2292,8 @@
assert 0 < offset <= 127
self.mc.overwrite(jz_location-1, chr(offset))
+ genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
+
def genop_force_token(self, op, arglocs, resloc):
# RegAlloc.consider_force_token ensures this:
assert isinstance(resloc, RegLoc)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -884,12 +884,18 @@
def consider_cond_call_gc_wb(self, op):
assert op.result is None
args = op.getarglist()
- loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
- # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
+ loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
+ # ^^^ we force loc_newvalue in a reg (unless it's a Const),
# because it will be needed anyway by the following setfield_gc.
# It avoids loading it twice from the memory.
loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
- arglocs = [loc_base, loc_newvalue_or_index]
+ #
+ if len(args) == 2:
+ arglocs = [loc_base, loc_newvalue] # cond_call_gc_wb
+ else:
+ # cond_call_gc_wb_array
+ loc_arrayindex = self.rm.make_sure_var_in_reg(op.getarg(2), args)
+ arglocs = [loc_base, loc_newvalue, loc_arrayindex]
# add eax, ecx and edx as extra "arguments" to ensure they are
# saved and restored. Fish in self.rm to know which of these
# registers really need to be saved (a bit of a hack). Moreover,
@@ -903,6 +909,8 @@
self.PerformDiscard(op, arglocs)
self.rm.possibly_free_vars_for_op(op)
+ consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
+
def fastpath_malloc_fixedsize(self, op, descr):
assert isinstance(descr, BaseSizeDescr)
self._do_fastpath_malloc(op, descr.size, descr.tid)
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -316,6 +316,7 @@
if value in (rop.FORCE_TOKEN,
rop.CALL_ASSEMBLER,
rop.COND_CALL_GC_WB,
+ rop.COND_CALL_GC_WB_ARRAY,
rop.DEBUG_MERGE_POINT,
rop.JIT_DEBUG,
rop.SETARRAYITEM_RAW,
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -477,8 +477,8 @@
'STRSETITEM/3',
'UNICODESETITEM/3',
#'RUNTIMENEW/1', # ootype operation
- 'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
- # (for the write barrier, latter is in an array)
+ 'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier)
+ 'COND_CALL_GC_WB_ARRAY/3d', # [objptr, newvalue, arrayindex] (write barr.)
'DEBUG_MERGE_POINT/*', # debugging only
'JIT_DEBUG/*', # debugging only
'VIRTUAL_REF_FINISH/2', # removed before it's passed to the backend
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -927,7 +927,7 @@
def write_barrier_from_array(self, newvalue, addr_array, index):
if self.header(addr_array).tid & GCFLAG_NO_YOUNG_PTRS:
if self.card_page_indices > 0: # <- constant-folded
- self.remember_young_pointer_from_array(addr_array, index)
+ self.remember_young_pointer_from_array2(addr_array, index)
else:
self.remember_young_pointer(addr_array, newvalue)
@@ -976,7 +976,7 @@
def _init_writebarrier_with_card_marker(self):
DEBUG = self.DEBUG
- def remember_young_pointer_from_array(addr_array, index):
+ def remember_young_pointer_from_array2(addr_array, index):
# 'addr_array' is the address of the object in which we write,
# which must have an array part; 'index' is the index of the
# item that is (or contains) the pointer that we write.
@@ -1011,7 +1011,7 @@
#
# We set the flag (even if the newly written address does not
# actually point to the nursery, which seems to be ok -- actually
- # it seems more important that remember_young_pointer_from_array()
+ # it seems more important that remember_young_pointer_from_array2()
# does not take 3 arguments).
addr_byte.char[0] = chr(byte | bitmask)
#
@@ -1019,10 +1019,67 @@
self.old_objects_with_cards_set.append(addr_array)
objhdr.tid |= GCFLAG_CARDS_SET
- remember_young_pointer_from_array._dont_inline_ = True
+ remember_young_pointer_from_array2._dont_inline_ = True
assert self.card_page_indices > 0
- self.remember_young_pointer_from_array = (
- remember_young_pointer_from_array)
+ self.remember_young_pointer_from_array2 = (
+ remember_young_pointer_from_array2)
+
+ # xxx trying it out for the JIT: a 3-arguments version of the above
+ def remember_young_pointer_from_array3(addr_array, index, newvalue):
+ if DEBUG: # note: PYPY_GC_DEBUG=1 does not enable this
+ ll_assert(self.debug_is_old_object(addr_array),
+ "young array with GCFLAG_NO_YOUNG_PTRS")
+ objhdr = self.header(addr_array)
+ #
+ # a single check for the common case of neither GCFLAG_HAS_CARDS
+ # nor GCFLAG_NO_HEAP_PTRS
+ if objhdr.tid & (GCFLAG_HAS_CARDS | GCFLAG_NO_HEAP_PTRS) == 0:
+ # common case: fast path, jump to the end of the function
+ pass
+ elif objhdr.tid & GCFLAG_HAS_CARDS == 0:
+ # no cards, but GCFLAG_NO_HEAP_PTRS is set.
+ objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+ self.prebuilt_root_objects.append(addr_array)
+ # jump to the end of the function
+ else:
+ # case with cards.
+ #
+ # If the newly written address does not actually point to the
+ # nursery, leave now.
+ if not self.appears_to_be_young(newvalue):
+ return
+ #
+ # 'addr_array' is a raw_malloc'ed array with card markers
+ # in front. Compute the index of the bit to set:
+ bitindex = index >> self.card_page_shift
+ byteindex = bitindex >> 3
+ bitmask = 1 << (bitindex & 7)
+ #
+ # If the bit is already set, leave now.
+ size_gc_header = self.gcheaderbuilder.size_gc_header
+ addr_byte = addr_array - size_gc_header
+ addr_byte = llarena.getfakearenaaddress(addr_byte) + \
+ (~byteindex)
+ byte = ord(addr_byte.char[0])
+ if byte & bitmask:
+ return
+ addr_byte.char[0] = chr(byte | bitmask)
+ #
+ if objhdr.tid & GCFLAG_CARDS_SET == 0:
+ self.old_objects_with_cards_set.append(addr_array)
+ objhdr.tid |= GCFLAG_CARDS_SET
+ return
+ #
+ # Logic for the no-cards case, put here to minimize the number
+ # of checks done at the start of the function
+ if self.appears_to_be_young(newvalue):
+ self.old_objects_pointing_to_young.append(addr_array)
+ objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
+
+ remember_young_pointer_from_array3._dont_inline_ = True
+ assert self.card_page_indices > 0
+ self.remember_young_pointer_from_array3 = (
+ remember_young_pointer_from_array3)
def assume_young_pointers(self, addr_struct):
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -463,7 +463,7 @@
annmodel.SomeInteger()],
annmodel.s_None,
inline=True)
- func = getattr(gcdata.gc, 'remember_young_pointer_from_array',
+ func = getattr(gcdata.gc, 'remember_young_pointer_from_array3',
None)
if func is not None:
# func should not be a bound method, but a real function
@@ -471,7 +471,8 @@
self.write_barrier_from_array_failing_case_ptr = \
getfn(func,
[annmodel.SomeAddress(),
- annmodel.SomeInteger()],
+ annmodel.SomeInteger(),
+ annmodel.SomeAddress()],
annmodel.s_None)
self.statistics_ptr = getfn(GCClass.statistics.im_func,
[s_gc, annmodel.SomeInteger()],
More information about the pypy-commit
mailing list