[pypy-commit] pypy gc_no_cleanup_nursery: hg merge gc_zero_array

arigo noreply at buildbot.pypy.org
Fri Sep 19 07:55:36 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: gc_no_cleanup_nursery
Changeset: r73610:526a7075bba7
Date: 2014-09-19 07:55 +0200
http://bitbucket.org/pypy/pypy/changeset/526a7075bba7/

Log:	hg merge gc_zero_array

diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -1,5 +1,5 @@
 from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.backend.llsupport.memcpy import memcpy_fn
+from rpython.jit.backend.llsupport.memcpy import memcpy_fn, memset_fn
 from rpython.jit.backend.llsupport.symbolic import WORD
 from rpython.jit.metainterp.history import (INT, REF, FLOAT, JitCellToken,
     ConstInt, BoxInt, AbstractFailDescr)
@@ -63,6 +63,7 @@
     def __init__(self, cpu, translate_support_code=False):
         self.cpu = cpu
         self.memcpy_addr = 0
+        self.memset_addr = 0
         self.rtyper = cpu.rtyper
         self._debug = False
 
@@ -79,6 +80,7 @@
         else:
             self.gc_size_of_header = WORD # for tests
         self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+        self.memset_addr = self.cpu.cast_ptr_to_int(memset_fn)
         self._build_failure_recovery(False, withfloats=False)
         self._build_failure_recovery(True, withfloats=False)
         self._build_wb_slowpath(False)
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -34,15 +34,6 @@
             self.fielddescr_vtable = get_field_descr(self, rclass.OBJECT,
                                                      'typeptr')
         self._generated_functions = []
-        self.memset_ptr = rffi.llexternal('memset', [lltype.Signed, rffi.INT,
-                                                     rffi.SIZE_T], lltype.Void,
-                                                     sandboxsafe=True,
-                                                     _nowrapper=True)
-        self.memset_ptr_as_int = heaptracker.adr2int(
-            llmemory.cast_ptr_to_adr(self.memset_ptr))
-        ei = EffectInfo([], [], [], [], [], [], EffectInfo.EF_CANNOT_RAISE)
-        self.memset_descr = get_call_descr(self, [lltype.Signed, rffi.INT,
-                                                  rffi.SIZE_T], lltype.Void, ei)
 
     def _setup_str(self):
         self.str_descr     = get_array_descr(self, rstr.STR)
diff --git a/rpython/jit/backend/llsupport/memcpy.py b/rpython/jit/backend/llsupport/memcpy.py
--- a/rpython/jit/backend/llsupport/memcpy.py
+++ b/rpython/jit/backend/llsupport/memcpy.py
@@ -3,3 +3,6 @@
 memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
                                        rffi.SIZE_T], lltype.Void,
                             sandboxsafe=True, _nowrapper=True)
+memset_fn = rffi.llexternal('memset', [llmemory.Address, rffi.INT,
+                                       rffi.SIZE_T], lltype.Void,
+                            sandboxsafe=True, _nowrapper=True)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -180,38 +180,16 @@
         if kind == FLAG_ARRAY:
             self.clear_varsize_gc_fields(op.getdescr(), op.result, v_length)
 
-    def handle_clear_array_contents(self, arraydescr, v_arr, v_arrsize=None):
-        # XXX this maybe should go to optimizer, so we can remove extra ops?
-        ofs, size, _ = self.cpu.unpack_arraydescr_size(arraydescr)
-        v_arr_plus_ofs = BoxInt()
-        v_totalsize = BoxInt()
-        gcdescr = self.gc_ll_descr
-        if isinstance(v_arrsize, ConstInt) and v_arrsize.getint() < 10:
-            # clear it item by item
-            ops = []
-            for i in range(v_arrsize.getint()):
-                ops.append(ResOperation(rop.SETARRAYITEM_GC,
-                                        [v_arr, ConstInt(i), self.c_zero], None,
-                                        descr=arraydescr))
-            self.newops += ops
-            return
-        ops = [
-            ResOperation(rop.INT_ADD, [v_arr, ConstInt(ofs)], v_arr_plus_ofs),
-        ]
-
-        if v_arrsize is None:
-            v_arrsize = BoxInt()
-            o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_arrsize,
+    def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None):
+        # XXX more work here to reduce or remove the ZERO_ARRAY in some cases
+        if v_length is None:
+            v_length = BoxInt()
+            o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length,
                              descr=arraydescr)
-            ops.append(o)
-        ops += [
-            ResOperation(rop.INT_MUL, [v_arrsize, ConstInt(size)], v_totalsize),
-            ResOperation(rop.CALL, [ConstInt(gcdescr.memset_ptr_as_int),
-                                    v_arr_plus_ofs,
-                                    ConstInt(0), v_totalsize], None,
-                                    descr=gcdescr.memset_descr),
-        ]
-        self.newops.extend(ops)
+            self.newops.append(o)
+        o = ResOperation(rop.ZERO_ARRAY, [v_arr, ConstInt(0), v_length], None,
+                         descr=arraydescr)
+        self.newops.append(o)
 
     def gen_malloc_frame(self, frame_info, frame, size_box):
         descrs = self.gc_ll_descr.getframedescrs(self.cpu)
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -4478,3 +4478,49 @@
         ref = self.cpu.get_ref_value(deadframe, 0)
         s = lltype.cast_opaque_ptr(lltype.Ptr(S), ref)
         assert not s.x
+
+    def test_zero_ptr_field(self):
+        XXX     # write me!
+
+    def test_zero_array(self):
+        PAIR = lltype.Struct('PAIR', ('a', lltype.Signed), ('b', lltype.Signed))
+        for OF in [lltype.Signed, rffi.INT, rffi.SHORT, rffi.UCHAR, PAIR]:
+            A = lltype.GcArray(OF)
+            arraydescr = self.cpu.arraydescrof(A)
+            a = lltype.malloc(A, 100)
+            addr = llmemory.cast_ptr_to_adr(a)
+            a_int = heaptracker.adr2int(addr)
+            a_ref = lltype.cast_opaque_ptr(llmemory.GCREF, a)
+            for (start, length) in [(0, 100), (49, 49), (1, 98),
+                                    (15, 9), (10, 10), (47, 0),
+                                    (0, 4)]:
+                for cls1 in [ConstInt, BoxInt]:
+                    for cls2 in [ConstInt, BoxInt]:
+                        print 'a_int:', a_int
+                        print 'of:', OF
+                        print 'start:', start
+                        print 'length:', length
+                        print 'cls1:', cls1.__name__
+                        print 'cls2:', cls2.__name__
+                        for i in range(100):
+                            if OF == PAIR:
+                                a[i].a = a[i].b = -123456789
+                            else:
+                                a[i] = rffi.cast(OF, -123456789)
+                        startbox = cls1(start)
+                        lengthbox = cls2(length)
+                        if cls1 == cls2 and start == length:
+                            lengthbox = startbox    # same box!
+                        self.execute_operation(rop.ZERO_ARRAY,
+                                               [BoxPtr(a_ref),
+                                                startbox,
+                                                lengthbox],
+                                           'void', descr=arraydescr)
+                        assert len(a) == 100
+                        for i in range(100):
+                            val = (0 if start <= i < start + length
+                                     else -123456789)
+                            if OF == PAIR:
+                                assert a[i].a == a[i].b == val
+                            else:
+                                assert a[i] == rffi.cast(OF, val)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2363,6 +2363,42 @@
             elif IS_X86_64:
                 mc.MOVSX32_rj(loc.value, addr)     # memory read, sign-extend
 
+    def genop_discard_zero_array(self, op, arglocs):
+        (base_loc, startindex_loc, bytes_loc,
+         itemsize_loc, baseofs_loc, null_loc) = arglocs
+        assert isinstance(bytes_loc, ImmedLoc)
+        assert isinstance(itemsize_loc, ImmedLoc)
+        assert isinstance(baseofs_loc, ImmedLoc)
+        assert isinstance(null_loc, RegLoc) and null_loc.is_xmm
+        baseofs = baseofs_loc.value
+        nbytes = bytes_loc.value
+        if valid_addressing_size(itemsize_loc.value):
+            scale = get_scale(itemsize_loc.value)
+        else:
+            assert isinstance(startindex_loc, ImmedLoc)
+            assert startindex_loc.value == 0
+            scale = 0
+        null_reg_cleared = False
+        i = 0
+        while i < nbytes:
+            addr = addr_add(base_loc, startindex_loc, baseofs + i, scale)
+            current = nbytes - i
+            if current >= 16:
+                current = 16
+                if not null_reg_cleared:
+                    self.mc.XORPS_xx(null_loc.value, null_loc.value)
+                    null_reg_cleared = True
+                self.mc.MOVUPS(addr, null_loc)
+            else:
+                if current >= WORD:
+                    current = WORD
+                elif current >= 4:
+                    current = 4
+                elif current >= 2:
+                    current = 2
+                self.save_into_mem(addr, imm0, imm(current))
+            i += current
+
 
 genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
 genop_list = [Assembler386.not_implemented_op] * rop._LAST
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -8,7 +8,8 @@
     unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
-     RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op)
+     RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op,
+     valid_addressing_size)
 from rpython.jit.backend.x86 import rx86
 from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
     IS_X86_64)
@@ -1384,6 +1385,71 @@
     def consider_keepalive(self, op):
         pass
 
+    def consider_zero_array(self, op):
+        itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+        args = op.getarglist()
+        base_loc = self.rm.make_sure_var_in_reg(args[0], args)
+        startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
+        length_box = op.getarg(2)
+        if isinstance(length_box, ConstInt):
+            constbytes = length_box.getint() * itemsize
+        else:
+            constbytes = -1
+        if 0 <= constbytes <= 16 * 8 and (
+                valid_addressing_size(itemsize) or
+                (isinstance(startindex_loc, ImmedLoc) and
+                  startindex_loc.value == 0)):
+            if IS_X86_64:
+                null_loc = X86_64_XMM_SCRATCH_REG
+            else:
+                null_box = TempBox()
+                null_loc = self.xrm.force_allocate_reg(null_box)
+                self.xrm.possibly_free_var(null_box)
+            self.perform_discard(op, [base_loc, startindex_loc,
+                                      imm(constbytes), imm(itemsize),
+                                      imm(baseofs), null_loc])
+        else:
+            # base_loc and startindex_loc are in two regs here (or they are
+            # immediates).  Compute the dstaddr_loc, which is the raw
+            # address that we will pass as first argument to memset().
+            # It can be in the same register as either one, but not in
+            # args[2], because we're still needing the latter.
+            dstaddr_box = TempBox()
+            dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, [args[2]])
+            itemsize_loc = imm(itemsize)
+            dst_addr = self.assembler._get_interiorfield_addr(
+                dstaddr_loc, startindex_loc, itemsize_loc,
+                base_loc, imm(baseofs))
+            self.assembler.mc.LEA(dstaddr_loc, dst_addr)
+            #
+            if constbytes >= 0:
+                length_loc = imm(constbytes)
+            else:
+                # load length_loc in a register different than dstaddr_loc
+                length_loc = self.rm.make_sure_var_in_reg(length_box,
+                                                          [dstaddr_box])
+                if itemsize > 1:
+                    # we need a register that is different from dstaddr_loc,
+                    # but which can be identical to length_loc (as usual,
+                    # only if the length_box is not used by future operations)
+                    bytes_box = TempBox()
+                    bytes_loc = self.rm.force_allocate_reg(bytes_box,
+                                                           [dstaddr_box])
+                    b_adr = self.assembler._get_interiorfield_addr(
+                        bytes_loc, length_loc, itemsize_loc, imm0, imm0)
+                    self.assembler.mc.LEA(bytes_loc, b_adr)
+                    length_box = bytes_box
+                    length_loc = bytes_loc
+            #
+            # call memset()
+            self.rm.before_call()
+            self.xrm.before_call()
+            self.assembler.simple_call_no_collect(
+                imm(self.assembler.memset_addr),
+                [dstaddr_loc, imm0, length_loc])
+            self.rm.possibly_free_var(length_box)
+            self.rm.possibly_free_var(dstaddr_box)
+
     def not_implemented_op(self, op):
         not_implemented("not implemented operation: %s" % op.getopname())
 
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -664,6 +664,7 @@
 
     MOVDQ = _binaryop('MOVDQ')
     MOVD32 = _binaryop('MOVD32')
+    MOVUPS = _binaryop('MOVUPS')
 
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -634,6 +634,9 @@
     MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
+    MOVUPS_mx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_const(1))
+    MOVUPS_jx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), abs_(1))
+    MOVUPS_ax = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_scaled_reg_plus_const(1))
 
     # ------------------------------------------------------------
 
@@ -764,6 +767,7 @@
 define_modrm_modes('DIVSD_x*', ['\xF2', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
 define_modrm_modes('UCOMISD_x*', ['\x66', rex_nw, '\x0F\x2E', register(1, 8)], regtype='XMM')
 define_modrm_modes('XORPD_x*', ['\x66', rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
+define_modrm_modes('XORPS_x*', [rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
 define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
 
 def define_pxmm_insn(insnname_template, insn_char):
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -326,6 +326,7 @@
                          rop.COND_CALL_GC_WB,
                          rop.COND_CALL_GC_WB_ARRAY,
                          rop.ZERO_PTR_FIELD,
+                         rop.ZERO_ARRAY,
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -504,8 +504,10 @@
     'SETINTERIORFIELD_RAW/3d',    # right now, only used by tests
     'RAW_STORE/3d',
     'SETFIELD_GC/2d',
-    'ZERO_PTR_FIELD/2', # only emitted by the rewrite, sets a pointer field
+    'ZERO_PTR_FIELD/2', # only emitted by the rewrite, clears a pointer field
                         # at a given offset, no descr
+    'ZERO_ARRAY/3d',    # only emitted by the rewrite, clears (part of) an array
+                        # [arraygcptr, firstindex, length], descr=ArrayDescr
     'SETFIELD_RAW/2d',
     'STRSETITEM/3',
     'UNICODESETITEM/3',


More information about the pypy-commit mailing list