[pypy-commit] pypy gc_no_cleanup_nursery: hg merge gc_zero_array
arigo
noreply at buildbot.pypy.org
Fri Sep 19 07:55:36 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: gc_no_cleanup_nursery
Changeset: r73610:526a7075bba7
Date: 2014-09-19 07:55 +0200
http://bitbucket.org/pypy/pypy/changeset/526a7075bba7/
Log: hg merge gc_zero_array
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -1,5 +1,5 @@
from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.backend.llsupport.memcpy import memcpy_fn
+from rpython.jit.backend.llsupport.memcpy import memcpy_fn, memset_fn
from rpython.jit.backend.llsupport.symbolic import WORD
from rpython.jit.metainterp.history import (INT, REF, FLOAT, JitCellToken,
ConstInt, BoxInt, AbstractFailDescr)
@@ -63,6 +63,7 @@
def __init__(self, cpu, translate_support_code=False):
self.cpu = cpu
self.memcpy_addr = 0
+ self.memset_addr = 0
self.rtyper = cpu.rtyper
self._debug = False
@@ -79,6 +80,7 @@
else:
self.gc_size_of_header = WORD # for tests
self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+ self.memset_addr = self.cpu.cast_ptr_to_int(memset_fn)
self._build_failure_recovery(False, withfloats=False)
self._build_failure_recovery(True, withfloats=False)
self._build_wb_slowpath(False)
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -34,15 +34,6 @@
self.fielddescr_vtable = get_field_descr(self, rclass.OBJECT,
'typeptr')
self._generated_functions = []
- self.memset_ptr = rffi.llexternal('memset', [lltype.Signed, rffi.INT,
- rffi.SIZE_T], lltype.Void,
- sandboxsafe=True,
- _nowrapper=True)
- self.memset_ptr_as_int = heaptracker.adr2int(
- llmemory.cast_ptr_to_adr(self.memset_ptr))
- ei = EffectInfo([], [], [], [], [], [], EffectInfo.EF_CANNOT_RAISE)
- self.memset_descr = get_call_descr(self, [lltype.Signed, rffi.INT,
- rffi.SIZE_T], lltype.Void, ei)
def _setup_str(self):
self.str_descr = get_array_descr(self, rstr.STR)
diff --git a/rpython/jit/backend/llsupport/memcpy.py b/rpython/jit/backend/llsupport/memcpy.py
--- a/rpython/jit/backend/llsupport/memcpy.py
+++ b/rpython/jit/backend/llsupport/memcpy.py
@@ -3,3 +3,6 @@
memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
rffi.SIZE_T], lltype.Void,
sandboxsafe=True, _nowrapper=True)
+memset_fn = rffi.llexternal('memset', [llmemory.Address, rffi.INT,
+ rffi.SIZE_T], lltype.Void,
+ sandboxsafe=True, _nowrapper=True)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -180,38 +180,16 @@
if kind == FLAG_ARRAY:
self.clear_varsize_gc_fields(op.getdescr(), op.result, v_length)
- def handle_clear_array_contents(self, arraydescr, v_arr, v_arrsize=None):
- # XXX this maybe should go to optimizer, so we can remove extra ops?
- ofs, size, _ = self.cpu.unpack_arraydescr_size(arraydescr)
- v_arr_plus_ofs = BoxInt()
- v_totalsize = BoxInt()
- gcdescr = self.gc_ll_descr
- if isinstance(v_arrsize, ConstInt) and v_arrsize.getint() < 10:
- # clear it item by item
- ops = []
- for i in range(v_arrsize.getint()):
- ops.append(ResOperation(rop.SETARRAYITEM_GC,
- [v_arr, ConstInt(i), self.c_zero], None,
- descr=arraydescr))
- self.newops += ops
- return
- ops = [
- ResOperation(rop.INT_ADD, [v_arr, ConstInt(ofs)], v_arr_plus_ofs),
- ]
-
- if v_arrsize is None:
- v_arrsize = BoxInt()
- o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_arrsize,
+ def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None):
+ # XXX more work here to reduce or remove the ZERO_ARRAY in some cases
+ if v_length is None:
+ v_length = BoxInt()
+ o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length,
descr=arraydescr)
- ops.append(o)
- ops += [
- ResOperation(rop.INT_MUL, [v_arrsize, ConstInt(size)], v_totalsize),
- ResOperation(rop.CALL, [ConstInt(gcdescr.memset_ptr_as_int),
- v_arr_plus_ofs,
- ConstInt(0), v_totalsize], None,
- descr=gcdescr.memset_descr),
- ]
- self.newops.extend(ops)
+ self.newops.append(o)
+ o = ResOperation(rop.ZERO_ARRAY, [v_arr, ConstInt(0), v_length], None,
+ descr=arraydescr)
+ self.newops.append(o)
def gen_malloc_frame(self, frame_info, frame, size_box):
descrs = self.gc_ll_descr.getframedescrs(self.cpu)
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -4478,3 +4478,49 @@
ref = self.cpu.get_ref_value(deadframe, 0)
s = lltype.cast_opaque_ptr(lltype.Ptr(S), ref)
assert not s.x
+
+ def test_zero_ptr_field(self):
+ XXX # write me!
+
+ def test_zero_array(self):
+ PAIR = lltype.Struct('PAIR', ('a', lltype.Signed), ('b', lltype.Signed))
+ for OF in [lltype.Signed, rffi.INT, rffi.SHORT, rffi.UCHAR, PAIR]:
+ A = lltype.GcArray(OF)
+ arraydescr = self.cpu.arraydescrof(A)
+ a = lltype.malloc(A, 100)
+ addr = llmemory.cast_ptr_to_adr(a)
+ a_int = heaptracker.adr2int(addr)
+ a_ref = lltype.cast_opaque_ptr(llmemory.GCREF, a)
+ for (start, length) in [(0, 100), (49, 49), (1, 98),
+ (15, 9), (10, 10), (47, 0),
+ (0, 4)]:
+ for cls1 in [ConstInt, BoxInt]:
+ for cls2 in [ConstInt, BoxInt]:
+ print 'a_int:', a_int
+ print 'of:', OF
+ print 'start:', start
+ print 'length:', length
+ print 'cls1:', cls1.__name__
+ print 'cls2:', cls2.__name__
+ for i in range(100):
+ if OF == PAIR:
+ a[i].a = a[i].b = -123456789
+ else:
+ a[i] = rffi.cast(OF, -123456789)
+ startbox = cls1(start)
+ lengthbox = cls2(length)
+ if cls1 == cls2 and start == length:
+ lengthbox = startbox # same box!
+ self.execute_operation(rop.ZERO_ARRAY,
+ [BoxPtr(a_ref),
+ startbox,
+ lengthbox],
+ 'void', descr=arraydescr)
+ assert len(a) == 100
+ for i in range(100):
+ val = (0 if start <= i < start + length
+ else -123456789)
+ if OF == PAIR:
+ assert a[i].a == a[i].b == val
+ else:
+ assert a[i] == rffi.cast(OF, val)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2363,6 +2363,42 @@
elif IS_X86_64:
mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend
+ def genop_discard_zero_array(self, op, arglocs):
+ (base_loc, startindex_loc, bytes_loc,
+ itemsize_loc, baseofs_loc, null_loc) = arglocs
+ assert isinstance(bytes_loc, ImmedLoc)
+ assert isinstance(itemsize_loc, ImmedLoc)
+ assert isinstance(baseofs_loc, ImmedLoc)
+ assert isinstance(null_loc, RegLoc) and null_loc.is_xmm
+ baseofs = baseofs_loc.value
+ nbytes = bytes_loc.value
+ if valid_addressing_size(itemsize_loc.value):
+ scale = get_scale(itemsize_loc.value)
+ else:
+ assert isinstance(startindex_loc, ImmedLoc)
+ assert startindex_loc.value == 0
+ scale = 0
+ null_reg_cleared = False
+ i = 0
+ while i < nbytes:
+ addr = addr_add(base_loc, startindex_loc, baseofs + i, scale)
+ current = nbytes - i
+ if current >= 16:
+ current = 16
+ if not null_reg_cleared:
+ self.mc.XORPS_xx(null_loc.value, null_loc.value)
+ null_reg_cleared = True
+ self.mc.MOVUPS(addr, null_loc)
+ else:
+ if current >= WORD:
+ current = WORD
+ elif current >= 4:
+ current = 4
+ elif current >= 2:
+ current = 2
+ self.save_into_mem(addr, imm0, imm(current))
+ i += current
+
genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
genop_list = [Assembler386.not_implemented_op] * rop._LAST
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -8,7 +8,8 @@
unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
- RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op)
+ RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op,
+ valid_addressing_size)
from rpython.jit.backend.x86 import rx86
from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
IS_X86_64)
@@ -1384,6 +1385,71 @@
def consider_keepalive(self, op):
pass
+ def consider_zero_array(self, op):
+ itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+ args = op.getarglist()
+ base_loc = self.rm.make_sure_var_in_reg(args[0], args)
+ startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
+ length_box = op.getarg(2)
+ if isinstance(length_box, ConstInt):
+ constbytes = length_box.getint() * itemsize
+ else:
+ constbytes = -1
+ if 0 <= constbytes <= 16 * 8 and (
+ valid_addressing_size(itemsize) or
+ (isinstance(startindex_loc, ImmedLoc) and
+ startindex_loc.value == 0)):
+ if IS_X86_64:
+ null_loc = X86_64_XMM_SCRATCH_REG
+ else:
+ null_box = TempBox()
+ null_loc = self.xrm.force_allocate_reg(null_box)
+ self.xrm.possibly_free_var(null_box)
+ self.perform_discard(op, [base_loc, startindex_loc,
+ imm(constbytes), imm(itemsize),
+ imm(baseofs), null_loc])
+ else:
+ # base_loc and startindex_loc are in two regs here (or they are
+ # immediates). Compute the dstaddr_loc, which is the raw
+ # address that we will pass as first argument to memset().
+ # It can be in the same register as either one, but not in
+ # args[2], because we're still needing the latter.
+ dstaddr_box = TempBox()
+ dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, [args[2]])
+ itemsize_loc = imm(itemsize)
+ dst_addr = self.assembler._get_interiorfield_addr(
+ dstaddr_loc, startindex_loc, itemsize_loc,
+ base_loc, imm(baseofs))
+ self.assembler.mc.LEA(dstaddr_loc, dst_addr)
+ #
+ if constbytes >= 0:
+ length_loc = imm(constbytes)
+ else:
+ # load length_loc in a register different than dstaddr_loc
+ length_loc = self.rm.make_sure_var_in_reg(length_box,
+ [dstaddr_box])
+ if itemsize > 1:
+ # we need a register that is different from dstaddr_loc,
+ # but which can be identical to length_loc (as usual,
+ # only if the length_box is not used by future operations)
+ bytes_box = TempBox()
+ bytes_loc = self.rm.force_allocate_reg(bytes_box,
+ [dstaddr_box])
+ b_adr = self.assembler._get_interiorfield_addr(
+ bytes_loc, length_loc, itemsize_loc, imm0, imm0)
+ self.assembler.mc.LEA(bytes_loc, b_adr)
+ length_box = bytes_box
+ length_loc = bytes_loc
+ #
+ # call memset()
+ self.rm.before_call()
+ self.xrm.before_call()
+ self.assembler.simple_call_no_collect(
+ imm(self.assembler.memset_addr),
+ [dstaddr_loc, imm0, length_loc])
+ self.rm.possibly_free_var(length_box)
+ self.rm.possibly_free_var(dstaddr_box)
+
def not_implemented_op(self, op):
not_implemented("not implemented operation: %s" % op.getopname())
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -664,6 +664,7 @@
MOVDQ = _binaryop('MOVDQ')
MOVD32 = _binaryop('MOVD32')
+ MOVUPS = _binaryop('MOVUPS')
CALL = _relative_unaryop('CALL')
JMP = _relative_unaryop('JMP')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -634,6 +634,9 @@
MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2))
PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
+ MOVUPS_mx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_const(1))
+ MOVUPS_jx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), abs_(1))
+ MOVUPS_ax = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_scaled_reg_plus_const(1))
# ------------------------------------------------------------
@@ -764,6 +767,7 @@
define_modrm_modes('DIVSD_x*', ['\xF2', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
define_modrm_modes('UCOMISD_x*', ['\x66', rex_nw, '\x0F\x2E', register(1, 8)], regtype='XMM')
define_modrm_modes('XORPD_x*', ['\x66', rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
+define_modrm_modes('XORPS_x*', [rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
def define_pxmm_insn(insnname_template, insn_char):
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -326,6 +326,7 @@
rop.COND_CALL_GC_WB,
rop.COND_CALL_GC_WB_ARRAY,
rop.ZERO_PTR_FIELD,
+ rop.ZERO_ARRAY,
rop.DEBUG_MERGE_POINT,
rop.JIT_DEBUG,
rop.SETARRAYITEM_RAW,
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -504,8 +504,10 @@
'SETINTERIORFIELD_RAW/3d', # right now, only used by tests
'RAW_STORE/3d',
'SETFIELD_GC/2d',
- 'ZERO_PTR_FIELD/2', # only emitted by the rewrite, sets a pointer field
+ 'ZERO_PTR_FIELD/2', # only emitted by the rewrite, clears a pointer field
# at a given offset, no descr
+ 'ZERO_ARRAY/3d', # only emitted by the rewrite, clears (part of) an array
+ # [arraygcptr, firstindex, length], descr=ArrayDescr
'SETFIELD_RAW/2d',
'STRSETITEM/3',
'UNICODESETITEM/3',
More information about the pypy-commit
mailing list