[pypy-commit] pypy default: Optimize ZERO_ARRAY(const) followed by some number of SETARRAYITEM_GCs.

arigo noreply at buildbot.pypy.org
Mon Sep 29 11:17:34 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r73744:1083964c3070
Date: 2014-09-29 11:16 +0200
http://bitbucket.org/pypy/pypy/changeset/1083964c3070/

Log:	Optimize ZERO_ARRAY(const) followed by some number of
	SETARRAYITEM_GCs.

diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -1180,6 +1180,9 @@
     def emit_op_zero_array(self, op, arglocs, regalloc, fcond):
         from rpython.jit.backend.llsupport.descr import unpack_arraydescr
         assert len(arglocs) == 0
+        length_box = op.getarg(2)
+        if isinstance(length_box, ConstInt) and length_box.getint() == 0:
+            return fcond     # nothing to do
         itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
         args = op.getarglist()
         base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
@@ -1191,7 +1194,6 @@
         else:
             startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args)
             startindex = -1
-        length_box = op.getarg(2)
 
         # base_loc and startindex_loc are in two regs here (or they are
         # immediates).  Compute the dstaddr_loc, which is the raw
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -48,7 +48,8 @@
         self.known_lengths = {}
         self.write_barrier_applied = {}
         self.delayed_zero_setfields = {}
-        self.delayed_zero_setarrayitems = {}
+        self.last_zero_arrays = []
+        self.setarrayitems_occurred = {}   # {box: {set-of-indexes}}
 
     def rewrite(self, operations):
         # we can only remember one malloc since the next malloc can possibly
@@ -81,6 +82,7 @@
                     self.handle_write_barrier_setinteriorfield(op)
                     continue
                 if op.getopnum() == rop.SETARRAYITEM_GC:
+                    self.consider_setarrayitem_gc(op)
                     self.handle_write_barrier_setarrayitem(op)
                     continue
             else:
@@ -89,6 +91,8 @@
                 # need to clal it
                 if op.getopnum() == rop.SETFIELD_GC:
                     self.consider_setfield_gc(op)
+                elif op.getopnum() == rop.SETARRAYITEM_GC:
+                    self.consider_setarrayitem_gc(op)
             # ---------- call assembler -----------
             if op.getopnum() == rop.CALL_ASSEMBLER:
                 self.handle_call_assembler(op)
@@ -146,6 +150,16 @@
         except KeyError:
             pass
 
+    def consider_setarrayitem_gc(self, op):
+        array_box = op.getarg(0)
+        index_box = op.getarg(1)
+        if isinstance(array_box, BoxPtr) and isinstance(index_box, ConstInt):
+            try:
+                intset = self.setarrayitems_occurred[array_box]
+            except KeyError:
+                intset = self.setarrayitems_occurred[array_box] = {}
+            intset[index_box.getint()] = None
+
     def clear_varsize_gc_fields(self, kind, descr, result, v_length, opnum):
         if self.gc_ll_descr.malloc_zero_filled:
             return
@@ -216,18 +230,18 @@
         self.clear_varsize_gc_fields(kind, op.getdescr(), op.result, v_length,
                                      op.getopnum())
 
-    def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None):
-        # XXX more work here to reduce or remove the ZERO_ARRAY in some cases
-        if v_length is None:
-            v_length = BoxInt()
-            o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length,
-                             descr=arraydescr)
-            self.newops.append(o)
-        elif isinstance(v_length, ConstInt) and v_length.getint() == 0:
+    def handle_clear_array_contents(self, arraydescr, v_arr, v_length):
+        assert v_length is not None
+        if isinstance(v_length, ConstInt) and v_length.getint() == 0:
             return
+        # the ZERO_ARRAY operation will be optimized according to what
+        # SETARRAYITEM_GC we see before the next allocation operation.
+        # See emit_pending_zeros().
         o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], None,
                          descr=arraydescr)
         self.newops.append(o)
+        if isinstance(v_length, ConstInt):
+            self.last_zero_arrays.append(o)
 
     def gen_malloc_frame(self, frame_info, frame, size_box):
         descrs = self.gc_ll_descr.getframedescrs(self.cpu)
@@ -317,6 +331,31 @@
         self.emit_pending_zeros()
 
     def emit_pending_zeros(self):
+        # First, try to rewrite the existing ZERO_ARRAY operations from
+        # the 'last_zero_arrays' list.  Note that these operation objects
+        # are also already in 'newops', which is the point.
+        for op in self.last_zero_arrays:
+            assert op.getopnum() == rop.ZERO_ARRAY
+            box = op.getarg(0)
+            try:
+                intset = self.setarrayitems_occurred[box]
+            except KeyError:
+                continue
+            assert op.getarg(1).getint() == 0   # always 'start=0' initially
+            start = 0
+            while start in intset:
+                start += 1
+            op.setarg(1, ConstInt(start))
+            stop = op.getarg(2).getint()
+            assert start <= stop
+            while stop > start and (stop - 1) in intset:
+                stop -= 1
+            op.setarg(2, ConstInt(stop - start))
+            # ^^ may be ConstInt(0); then the operation becomes a no-op
+        del self.last_zero_arrays[:]
+        self.setarrayitems_occurred.clear()
+        #
+        # Then write the ZERO_PTR_FIELDs that are still pending
         for v, d in self.delayed_zero_setfields.iteritems():
             for ofs in d.iterkeys():
                 op = ResOperation(rop.ZERO_PTR_FIELD, [v, ConstInt(ofs)], None)
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -743,6 +743,163 @@
             jump()
         """)
 
+    def test_zero_array_reduced_left(self):
+        self.check_rewrite("""
+            [p1, p2]
+            p0 = new_array_clear(5, descr=cdescr)
+            setarrayitem_gc(p0, 1, p1, descr=cdescr)
+            setarrayitem_gc(p0, 0, p2, descr=cdescr)
+            jump()
+        """, """
+            [p1, p2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 2, 3, descr=cdescr)
+            setarrayitem_gc(p0, 1, p1, descr=cdescr)
+            setarrayitem_gc(p0, 0, p2, descr=cdescr)
+            jump()
+        """)
+
+    def test_zero_array_reduced_right(self):
+        self.check_rewrite("""
+            [p1, p2]
+            p0 = new_array_clear(5, descr=cdescr)
+            setarrayitem_gc(p0, 3, p1, descr=cdescr)
+            setarrayitem_gc(p0, 4, p2, descr=cdescr)
+            jump()
+        """, """
+            [p1, p2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 0, 3, descr=cdescr)
+            setarrayitem_gc(p0, 3, p1, descr=cdescr)
+            setarrayitem_gc(p0, 4, p2, descr=cdescr)
+            jump()
+        """)
+
+    def test_zero_array_not_reduced_at_all(self):
+        self.check_rewrite("""
+            [p1, p2]
+            p0 = new_array_clear(5, descr=cdescr)
+            setarrayitem_gc(p0, 3, p1, descr=cdescr)
+            setarrayitem_gc(p0, 2, p2, descr=cdescr)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """, """
+            [p1, p2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 0, 5, descr=cdescr)
+            setarrayitem_gc(p0, 3, p1, descr=cdescr)
+            setarrayitem_gc(p0, 2, p2, descr=cdescr)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """)
+
+    def test_zero_array_reduced_completely(self):
+        self.check_rewrite("""
+            [p1, p2]
+            p0 = new_array_clear(5, descr=cdescr)
+            setarrayitem_gc(p0, 3, p1, descr=cdescr)
+            setarrayitem_gc(p0, 4, p2, descr=cdescr)
+            setarrayitem_gc(p0, 0, p1, descr=cdescr)
+            setarrayitem_gc(p0, 2, p2, descr=cdescr)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """, """
+            [p1, p2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 5, 0, descr=cdescr)
+            setarrayitem_gc(p0, 3, p1, descr=cdescr)
+            setarrayitem_gc(p0, 4, p2, descr=cdescr)
+            setarrayitem_gc(p0, 0, p1, descr=cdescr)
+            setarrayitem_gc(p0, 2, p2, descr=cdescr)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """)
+
+    def test_zero_array_reduced_left_with_call(self):
+        self.check_rewrite("""
+            [p1, p2]
+            p0 = new_array_clear(5, descr=cdescr)
+            setarrayitem_gc(p0, 0, p1, descr=cdescr)
+            call(321321)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """, """
+            [p1, p2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 1, 4, descr=cdescr)
+            setarrayitem_gc(p0, 0, p1, descr=cdescr)
+            call(321321)
+            cond_call_gc_wb(p0, descr=wbdescr)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """)
+
+    def test_zero_array_reduced_left_with_label(self):
+        self.check_rewrite("""
+            [p1, p2]
+            p0 = new_array_clear(5, descr=cdescr)
+            setarrayitem_gc(p0, 0, p1, descr=cdescr)
+            label(p0, p2)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """, """
+            [p1, p2]
+            p0 = call_malloc_nursery(    \
+                                %(cdescr.basesize + 5 * cdescr.itemsize)d)
+            setfield_gc(p0, 8111, descr=tiddescr)
+            setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 1, 4, descr=cdescr)
+            setarrayitem_gc(p0, 0, p1, descr=cdescr)
+            label(p0, p2)
+            cond_call_gc_wb_array(p0, 1, descr=wbdescr)
+            setarrayitem_gc(p0, 1, p2, descr=cdescr)
+            jump()
+        """)
+
+    def test_zero_array_varsize(self):
+        self.check_rewrite("""
+            [p1, p2, i3]
+            p0 = new_array_clear(i3, descr=bdescr)
+            jump()
+        """, """
+            [p1, p2, i3]
+            p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
+            setfield_gc(p0, i3, descr=blendescr)
+            zero_array(p0, 0, i3, descr=bdescr)
+            jump()
+        """)
+
+    def test_zero_array_varsize_cannot_reduce(self):
+        self.check_rewrite("""
+            [p1, p2, i3]
+            p0 = new_array_clear(i3, descr=bdescr)
+            setarrayitem_gc(p0, 0, p1, descr=bdescr)
+            jump()
+        """, """
+            [p1, p2, i3]
+            p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
+            setfield_gc(p0, i3, descr=blendescr)
+            zero_array(p0, 0, i3, descr=bdescr)
+            cond_call_gc_wb_array(p0, 0, descr=wbdescr)
+            setarrayitem_gc(p0, 0, p1, descr=bdescr)
+            jump()
+        """)
+
     def test_initialization_store_potentially_large_array(self):
         # the write barrier cannot be omitted, because we might get
         # an array with cards and the GC assumes that the write
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1386,14 +1386,16 @@
 
     def consider_zero_array(self, op):
         itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+        length_box = op.getarg(2)
+        if isinstance(length_box, ConstInt):
+            constbytes = length_box.getint() * itemsize
+            if constbytes == 0:
+                return    # nothing to do
+        else:
+            constbytes = -1
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(args[0], args)
         startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
-        length_box = op.getarg(2)
-        if isinstance(length_box, ConstInt):
-            constbytes = length_box.getint() * itemsize
-        else:
-            constbytes = -1
         if 0 <= constbytes <= 16 * 8 and (
                 valid_addressing_size(itemsize) or
 -               isinstance(startindex_loc, ImmedLoc)):


More information about the pypy-commit mailing list