[pypy-commit] pypy default: Optimize ZERO_ARRAY(const) followed by some number of SETARRAYITEM_GCs.
arigo
noreply at buildbot.pypy.org
Mon Sep 29 11:17:34 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r73744:1083964c3070
Date: 2014-09-29 11:16 +0200
http://bitbucket.org/pypy/pypy/changeset/1083964c3070/
Log: Optimize ZERO_ARRAY(const) followed by some number of
SETARRAYITEM_GCs.
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -1180,6 +1180,9 @@
def emit_op_zero_array(self, op, arglocs, regalloc, fcond):
from rpython.jit.backend.llsupport.descr import unpack_arraydescr
assert len(arglocs) == 0
+ length_box = op.getarg(2)
+ if isinstance(length_box, ConstInt) and length_box.getint() == 0:
+ return fcond # nothing to do
itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
args = op.getarglist()
base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
@@ -1191,7 +1194,6 @@
else:
startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args)
startindex = -1
- length_box = op.getarg(2)
# base_loc and startindex_loc are in two regs here (or they are
# immediates). Compute the dstaddr_loc, which is the raw
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -48,7 +48,8 @@
self.known_lengths = {}
self.write_barrier_applied = {}
self.delayed_zero_setfields = {}
- self.delayed_zero_setarrayitems = {}
+ self.last_zero_arrays = []
+ self.setarrayitems_occurred = {} # {box: {set-of-indexes}}
def rewrite(self, operations):
# we can only remember one malloc since the next malloc can possibly
@@ -81,6 +82,7 @@
self.handle_write_barrier_setinteriorfield(op)
continue
if op.getopnum() == rop.SETARRAYITEM_GC:
+ self.consider_setarrayitem_gc(op)
self.handle_write_barrier_setarrayitem(op)
continue
else:
@@ -89,6 +91,8 @@
# need to clal it
if op.getopnum() == rop.SETFIELD_GC:
self.consider_setfield_gc(op)
+ elif op.getopnum() == rop.SETARRAYITEM_GC:
+ self.consider_setarrayitem_gc(op)
# ---------- call assembler -----------
if op.getopnum() == rop.CALL_ASSEMBLER:
self.handle_call_assembler(op)
@@ -146,6 +150,16 @@
except KeyError:
pass
+ def consider_setarrayitem_gc(self, op):
+ array_box = op.getarg(0)
+ index_box = op.getarg(1)
+ if isinstance(array_box, BoxPtr) and isinstance(index_box, ConstInt):
+ try:
+ intset = self.setarrayitems_occurred[array_box]
+ except KeyError:
+ intset = self.setarrayitems_occurred[array_box] = {}
+ intset[index_box.getint()] = None
+
def clear_varsize_gc_fields(self, kind, descr, result, v_length, opnum):
if self.gc_ll_descr.malloc_zero_filled:
return
@@ -216,18 +230,18 @@
self.clear_varsize_gc_fields(kind, op.getdescr(), op.result, v_length,
op.getopnum())
- def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None):
- # XXX more work here to reduce or remove the ZERO_ARRAY in some cases
- if v_length is None:
- v_length = BoxInt()
- o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length,
- descr=arraydescr)
- self.newops.append(o)
- elif isinstance(v_length, ConstInt) and v_length.getint() == 0:
+ def handle_clear_array_contents(self, arraydescr, v_arr, v_length):
+ assert v_length is not None
+ if isinstance(v_length, ConstInt) and v_length.getint() == 0:
return
+ # the ZERO_ARRAY operation will be optimized according to what
+ # SETARRAYITEM_GC we see before the next allocation operation.
+ # See emit_pending_zeros().
o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], None,
descr=arraydescr)
self.newops.append(o)
+ if isinstance(v_length, ConstInt):
+ self.last_zero_arrays.append(o)
def gen_malloc_frame(self, frame_info, frame, size_box):
descrs = self.gc_ll_descr.getframedescrs(self.cpu)
@@ -317,6 +331,31 @@
self.emit_pending_zeros()
def emit_pending_zeros(self):
+ # First, try to rewrite the existing ZERO_ARRAY operations from
+ # the 'last_zero_arrays' list. Note that these operation objects
+ # are also already in 'newops', which is the point.
+ for op in self.last_zero_arrays:
+ assert op.getopnum() == rop.ZERO_ARRAY
+ box = op.getarg(0)
+ try:
+ intset = self.setarrayitems_occurred[box]
+ except KeyError:
+ continue
+ assert op.getarg(1).getint() == 0 # always 'start=0' initially
+ start = 0
+ while start in intset:
+ start += 1
+ op.setarg(1, ConstInt(start))
+ stop = op.getarg(2).getint()
+ assert start <= stop
+ while stop > start and (stop - 1) in intset:
+ stop -= 1
+ op.setarg(2, ConstInt(stop - start))
+ # ^^ may be ConstInt(0); then the operation becomes a no-op
+ del self.last_zero_arrays[:]
+ self.setarrayitems_occurred.clear()
+ #
+ # Then write the ZERO_PTR_FIELDs that are still pending
for v, d in self.delayed_zero_setfields.iteritems():
for ofs in d.iterkeys():
op = ResOperation(rop.ZERO_PTR_FIELD, [v, ConstInt(ofs)], None)
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -743,6 +743,163 @@
jump()
""")
+ def test_zero_array_reduced_left(self):
+ self.check_rewrite("""
+ [p1, p2]
+ p0 = new_array_clear(5, descr=cdescr)
+ setarrayitem_gc(p0, 1, p1, descr=cdescr)
+ setarrayitem_gc(p0, 0, p2, descr=cdescr)
+ jump()
+ """, """
+ [p1, p2]
+ p0 = call_malloc_nursery( \
+ %(cdescr.basesize + 5 * cdescr.itemsize)d)
+ setfield_gc(p0, 8111, descr=tiddescr)
+ setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 2, 3, descr=cdescr)
+ setarrayitem_gc(p0, 1, p1, descr=cdescr)
+ setarrayitem_gc(p0, 0, p2, descr=cdescr)
+ jump()
+ """)
+
+ def test_zero_array_reduced_right(self):
+ self.check_rewrite("""
+ [p1, p2]
+ p0 = new_array_clear(5, descr=cdescr)
+ setarrayitem_gc(p0, 3, p1, descr=cdescr)
+ setarrayitem_gc(p0, 4, p2, descr=cdescr)
+ jump()
+ """, """
+ [p1, p2]
+ p0 = call_malloc_nursery( \
+ %(cdescr.basesize + 5 * cdescr.itemsize)d)
+ setfield_gc(p0, 8111, descr=tiddescr)
+ setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 0, 3, descr=cdescr)
+ setarrayitem_gc(p0, 3, p1, descr=cdescr)
+ setarrayitem_gc(p0, 4, p2, descr=cdescr)
+ jump()
+ """)
+
+ def test_zero_array_not_reduced_at_all(self):
+ self.check_rewrite("""
+ [p1, p2]
+ p0 = new_array_clear(5, descr=cdescr)
+ setarrayitem_gc(p0, 3, p1, descr=cdescr)
+ setarrayitem_gc(p0, 2, p2, descr=cdescr)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """, """
+ [p1, p2]
+ p0 = call_malloc_nursery( \
+ %(cdescr.basesize + 5 * cdescr.itemsize)d)
+ setfield_gc(p0, 8111, descr=tiddescr)
+ setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 0, 5, descr=cdescr)
+ setarrayitem_gc(p0, 3, p1, descr=cdescr)
+ setarrayitem_gc(p0, 2, p2, descr=cdescr)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """)
+
+ def test_zero_array_reduced_completely(self):
+ self.check_rewrite("""
+ [p1, p2]
+ p0 = new_array_clear(5, descr=cdescr)
+ setarrayitem_gc(p0, 3, p1, descr=cdescr)
+ setarrayitem_gc(p0, 4, p2, descr=cdescr)
+ setarrayitem_gc(p0, 0, p1, descr=cdescr)
+ setarrayitem_gc(p0, 2, p2, descr=cdescr)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """, """
+ [p1, p2]
+ p0 = call_malloc_nursery( \
+ %(cdescr.basesize + 5 * cdescr.itemsize)d)
+ setfield_gc(p0, 8111, descr=tiddescr)
+ setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 5, 0, descr=cdescr)
+ setarrayitem_gc(p0, 3, p1, descr=cdescr)
+ setarrayitem_gc(p0, 4, p2, descr=cdescr)
+ setarrayitem_gc(p0, 0, p1, descr=cdescr)
+ setarrayitem_gc(p0, 2, p2, descr=cdescr)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """)
+
+ def test_zero_array_reduced_left_with_call(self):
+ self.check_rewrite("""
+ [p1, p2]
+ p0 = new_array_clear(5, descr=cdescr)
+ setarrayitem_gc(p0, 0, p1, descr=cdescr)
+ call(321321)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """, """
+ [p1, p2]
+ p0 = call_malloc_nursery( \
+ %(cdescr.basesize + 5 * cdescr.itemsize)d)
+ setfield_gc(p0, 8111, descr=tiddescr)
+ setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 1, 4, descr=cdescr)
+ setarrayitem_gc(p0, 0, p1, descr=cdescr)
+ call(321321)
+ cond_call_gc_wb(p0, descr=wbdescr)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """)
+
+ def test_zero_array_reduced_left_with_label(self):
+ self.check_rewrite("""
+ [p1, p2]
+ p0 = new_array_clear(5, descr=cdescr)
+ setarrayitem_gc(p0, 0, p1, descr=cdescr)
+ label(p0, p2)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """, """
+ [p1, p2]
+ p0 = call_malloc_nursery( \
+ %(cdescr.basesize + 5 * cdescr.itemsize)d)
+ setfield_gc(p0, 8111, descr=tiddescr)
+ setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 1, 4, descr=cdescr)
+ setarrayitem_gc(p0, 0, p1, descr=cdescr)
+ label(p0, p2)
+ cond_call_gc_wb_array(p0, 1, descr=wbdescr)
+ setarrayitem_gc(p0, 1, p2, descr=cdescr)
+ jump()
+ """)
+
+ def test_zero_array_varsize(self):
+ self.check_rewrite("""
+ [p1, p2, i3]
+ p0 = new_array_clear(i3, descr=bdescr)
+ jump()
+ """, """
+ [p1, p2, i3]
+ p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
+ setfield_gc(p0, i3, descr=blendescr)
+ zero_array(p0, 0, i3, descr=bdescr)
+ jump()
+ """)
+
+ def test_zero_array_varsize_cannot_reduce(self):
+ self.check_rewrite("""
+ [p1, p2, i3]
+ p0 = new_array_clear(i3, descr=bdescr)
+ setarrayitem_gc(p0, 0, p1, descr=bdescr)
+ jump()
+ """, """
+ [p1, p2, i3]
+ p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr)
+ setfield_gc(p0, i3, descr=blendescr)
+ zero_array(p0, 0, i3, descr=bdescr)
+ cond_call_gc_wb_array(p0, 0, descr=wbdescr)
+ setarrayitem_gc(p0, 0, p1, descr=bdescr)
+ jump()
+ """)
+
def test_initialization_store_potentially_large_array(self):
# the write barrier cannot be omitted, because we might get
# an array with cards and the GC assumes that the write
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1386,14 +1386,16 @@
def consider_zero_array(self, op):
itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+ length_box = op.getarg(2)
+ if isinstance(length_box, ConstInt):
+ constbytes = length_box.getint() * itemsize
+ if constbytes == 0:
+ return # nothing to do
+ else:
+ constbytes = -1
args = op.getarglist()
base_loc = self.rm.make_sure_var_in_reg(args[0], args)
startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
- length_box = op.getarg(2)
- if isinstance(length_box, ConstInt):
- constbytes = length_box.getint() * itemsize
- else:
- constbytes = -1
if 0 <= constbytes <= 16 * 8 and (
valid_addressing_size(itemsize) or
- isinstance(startindex_loc, ImmedLoc)):
More information about the pypy-commit
mailing list