[pypy-commit] pypy default: Merge jit-write-barrier-from-array:

Armin Rigo noreply at buildbot.pypy.org
Sat Jun 4 07:25:08 CEST 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r44685:5a240a694bb8
Date: 2011-06-04 07:24 +0200
http://bitbucket.org/pypy/pypy/changeset/5a240a694bb8/

Log:	Merge jit-write-barrier-from-array:

	 * in case the JIT generates a SETARRAYITEM_GC on a list which it
	cannot prove is short enough, we should really use
	write_barrier_from_array instead of the default write_barrier.

	 * get rid of GcRefList, one of the last remaining causes of
	leaks, as far as I can tell.

diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
         pass
     def do_write_barrier(self, gcref_struct, gcref_newptr):
         pass
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         return operations
     def can_inline_malloc(self, descr):
         return False
@@ -146,78 +146,6 @@
 # All code below is for the hybrid or minimark GC
 
 
-class GcRefList:
-    """Handles all references from the generated assembler to GC objects.
-    This is implemented as a nonmovable, but GC, list; the assembler contains
-    code that will (for now) always read from this list."""
-
-    GCREF_LIST = lltype.GcArray(llmemory.GCREF)     # followed by the GC
-
-    HASHTABLE = rffi.CArray(llmemory.Address)      # ignored by the GC
-    HASHTABLE_BITS = 10
-    HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
-    def initialize(self):
-        if we_are_translated(): n = 2000
-        else:                   n = 10    # tests only
-        self.list = self.alloc_gcref_list(n)
-        self.nextindex = 0
-        self.oldlists = []
-        # A pseudo dictionary: it is fixed size, and it may contain
-        # random nonsense after a collection moved the objects.  It is only
-        # used to avoid too many duplications in the GCREF_LISTs.
-        self.hashtable = lltype.malloc(self.HASHTABLE,
-                                       self.HASHTABLE_SIZE+1,
-                                       flavor='raw', track_allocation=False)
-        dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
-                                     self.HASHTABLE_SIZE)
-        dummy = llmemory.cast_ptr_to_adr(dummy)
-        for i in range(self.HASHTABLE_SIZE+1):
-            self.hashtable[i] = dummy
-
-    def alloc_gcref_list(self, n):
-        # Important: the GRREF_LISTs allocated are *non-movable*.  This
-        # requires support in the gc (hybrid GC or minimark GC so far).
-        if we_are_translated():
-            list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
-            assert list, "malloc_nonmovable failed!"
-        else:
-            list = lltype.malloc(self.GCREF_LIST, n)     # for tests only
-        return list
-
-    def get_address_of_gcref(self, gcref):
-        assert lltype.typeOf(gcref) == llmemory.GCREF
-        # first look in the hashtable, using an inexact hash (fails after
-        # the object moves)
-        addr = llmemory.cast_ptr_to_adr(gcref)
-        hash = llmemory.cast_adr_to_int(addr, "forced")
-        hash -= hash >> self.HASHTABLE_BITS
-        hash &= self.HASHTABLE_SIZE - 1
-        addr_ref = self.hashtable[hash]
-        # the following test is safe anyway, because the addresses found
-        # in the hashtable are always the addresses of nonmovable stuff
-        # ('addr_ref' is an address inside self.list, not directly the
-        # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
-        if addr_ref.address[0] == addr:
-            return addr_ref
-        # if it fails, add an entry to the list
-        if self.nextindex == len(self.list):
-            # reallocate first, increasing a bit the size every time
-            self.oldlists.append(self.list)
-            self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
-            self.nextindex = 0
-        # add it
-        index = self.nextindex
-        self.list[index] = gcref
-        addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
-                                        index)
-        addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
-        self.nextindex = index + 1
-        # record it in the hashtable
-        self.hashtable[hash] = addr_ref
-        return addr_ref
-
-
 class GcRootMap_asmgcc(object):
     """Handles locating the stack roots in the assembler.
     This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = get_field_descr(gc_ll_descr,
                                               gc_ll_descr.GCClass.HDR, 'tid')
         self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,13 @@
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)
 
+    def get_write_barrier_from_array_fn(self, cpu):
+        llop1 = self.llop1
+        funcptr = llop1.get_write_barrier_from_array_failing_case(
+            self.WB_ARRAY_FUNCPTR)
+        funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+        return cpu.cast_adr_to_int(funcaddr)    # this may return 0
+
 
 class GcLLDescr_framework(GcLLDescription):
     DEBUG = False    # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +495,7 @@
         self.translator = translator
         self.llop1 = llop1
 
-        # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+        # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
         # to work
         if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
             raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +510,6 @@
                                       " with the JIT" % (name,))
         gcrootmap = cls(gcdescr)
         self.gcrootmap = gcrootmap
-        self.gcrefs = GcRefList()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
 
         # make a TransformerLayoutBuilder and save it on the translator
         # where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +551,8 @@
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
             [llmemory.Address, llmemory.Address], lltype.Void))
+        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+            [llmemory.Address, lltype.Signed], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +642,6 @@
         return rffi.cast(lltype.Signed, fptr)
 
     def initialize(self):
-        self.gcrefs.initialize()
         self.gcrootmap.initialize()
 
     def init_size_descr(self, S, descr):
@@ -768,54 +703,32 @@
             funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
                     llmemory.cast_ptr_to_adr(gcref_newptr))
 
-    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
-        # xxx some performance issue here
-        newargs = [None] * op.numargs()
-        needs_copy = False
+    def record_constptrs(self, op, gcrefs_output_list):
         for i in range(op.numargs()):
             v = op.getarg(i)
-            newargs[i] = v
             if isinstance(v, ConstPtr) and bool(v.value):
-                addr = self.gcrefs.get_address_of_gcref(v.value)
-                # ^^^even for non-movable objects, to record their presence
-                if rgc.can_move(v.value):
-                    box = BoxPtr(v.value)
-                    addr = cpu.cast_adr_to_int(addr)
-                    newops.append(ResOperation(rop.GETFIELD_RAW,
-                                               [ConstInt(addr)], box,
-                                               self.single_gcref_descr))
-                    newargs[i] = box
-                    needs_copy = True
-        #
-        if needs_copy:
-            return op.copy_and_change(op.getopnum(), args=newargs)
-        else:
-            return op
+                p = v.value
+                rgc._make_sure_does_not_move(p)
+                gcrefs_output_list.append(p)
 
-
-    def rewrite_assembler(self, cpu, operations):
+    def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         # Perform two kinds of rewrites in parallel:
         #
         # - Add COND_CALLs to the write barrier before SETFIELD_GC and
         #   SETARRAYITEM_GC operations.
         #
-        # - Remove all uses of ConstPtrs away from the assembler.
-        #   Idea: when running on a moving GC, we can't (easily) encode
-        #   the ConstPtrs in the assembler, because they can move at any
-        #   point in time.  Instead, we store them in 'gcrefs.list', a GC
-        #   but nonmovable list; and here, we modify 'operations' to
-        #   replace direct usage of ConstPtr with a BoxPtr loaded by a
-        #   GETFIELD_RAW from the array 'gcrefs.list'.
+        # - Record the ConstPtrs from the assembler.
         #
         newops = []
+        known_lengths = {}
         # we can only remember one malloc since the next malloc can possibly
         # collect
         last_malloc = None
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
-            # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+            # ---------- record the ConstPtrs ----------
+            self.record_constptrs(op, gcrefs_output_list)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -838,19 +751,40 @@
                     v = op.getarg(2)
                     if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                             bool(v.value)): # store a non-NULL
-                        # XXX detect when we should produce a
-                        # write_barrier_from_array
-                        self._gen_write_barrier(newops, op.getarg(0), v)
+                        self._gen_write_barrier_array(newops, op.getarg(0),
+                                                      op.getarg(1), v,
+                                                      cpu, known_lengths)
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+            elif op.getopnum() == rop.NEW_ARRAY:
+                v_length = op.getarg(0)
+                if isinstance(v_length, ConstInt):
+                    known_lengths[op.result] = v_length.getint()
             # ----------
             newops.append(op)
         return newops
 
-    def _gen_write_barrier(self, newops, v_base, v_value):
-        args = [v_base, v_value]
+    def _gen_write_barrier(self, newops, v_base, v_value_or_index):
+        # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
+        # (regular case), or an index (case of write_barrier_from_array)
+        args = [v_base, v_value_or_index]
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 
+    def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+                                 cpu, known_lengths):
+        if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+            # If we know statically the length of 'v', and it is not too
+            # big, then produce a regular write_barrier.  If it's unknown or
+            # too big, produce instead a write_barrier_from_array.
+            LARGE = 130
+            length = known_lengths.get(v_base, LARGE)
+            if length >= LARGE:
+                # unknown or too big: produce a write_barrier_from_array
+                self._gen_write_barrier(newops, v_base, v_index)
+                return
+        # fall-back case: produce a write_barrier
+        self._gen_write_barrier(newops, v_base, v_value)
+
     def can_inline_malloc(self, descr):
         assert isinstance(descr, BaseSizeDescr)
         if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -49,19 +49,6 @@
 
 # ____________________________________________________________
 
-def test_GcRefList():
-    S = lltype.GcStruct('S')
-    order = range(50) * 4
-    random.shuffle(order)
-    allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
-              for i in range(50)]
-    allocs = [allocs[i] for i in order]
-    #
-    gcrefs = GcRefList()
-    gcrefs.initialize()
-    addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
-    for i in range(len(allocs)):
-        assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
 
 class TestGcRootMapAsmGcc:
 
@@ -288,6 +275,18 @@
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
 
+    _have_wb_from_array = False
+
+    def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+        self.record.append(('barrier_from_array', adr_struct, v_index))
+
+    def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+        if self._have_wb_from_array:
+            return llhelper(FPTRTYPE,
+                            self._write_barrier_from_array_failing_case)
+        else:
+            return lltype.nullptr(FPTRTYPE.TO)
+
 
 class TestFramework(object):
     gc = 'hybrid'
@@ -303,9 +302,20 @@
             config = config_
         class FakeCPU(object):
             def cast_adr_to_int(self, adr):
-                ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
-                assert ptr._obj._callable == llop1._write_barrier_failing_case
-                return 42
+                if not adr:
+                    return 0
+                try:
+                    ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_failing_case
+                    return 42
+                except lltype.InvalidCast:
+                    ptr = llmemory.cast_adr_to_ptr(
+                        adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+                    assert ptr._obj._callable == \
+                           llop1._write_barrier_from_array_failing_case
+                    return 43
+
         gcdescr = get_description(config_)
         translator = FakeTranslator()
         llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
             ]
         gc_ll_descr = self.gc_ll_descr
-        operations = gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations, [])
         assert len(operations) == 0
 
     def test_rewrite_assembler_1(self):
-        # check rewriting of ConstPtrs
+        # check recording of ConstPtrs
         class MyFakeCPU(object):
             def cast_adr_to_int(self, adr):
                 assert adr == "some fake address"
@@ -438,56 +448,12 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
+        gcrefs = []
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        assert len(operations) == 2
-        assert operations[0].getopnum() == rop.GETFIELD_RAW
-        assert operations[0].getarg(0) == ConstInt(43)
-        assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
-        v_box = operations[0].result
-        assert isinstance(v_box, BoxPtr)
-        assert operations[1].getopnum() == rop.PTR_EQ
-        assert operations[1].getarg(0) == v_random_box
-        assert operations[1].getarg(1) == v_box
-        assert operations[1].result == v_result
-
-    def test_rewrite_assembler_1_cannot_move(self):
-        # check rewriting of ConstPtrs
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                xxx    # should not be called
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                seen.append(s_gcref1)
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        seen = []
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = BoxPtr()
-        v_result = BoxInt()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
-                         v_result),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        old_can_move = rgc.can_move
-        operations = get_deep_immutable_oplist(operations)
-        try:
-            rgc.can_move = lambda s: False
-            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
-        finally:
-            rgc.can_move = old_can_move
-        assert len(operations) == 1
-        assert operations[0].getopnum() == rop.PTR_EQ
-        assert operations[0].getarg(0) == v_random_box
-        assert operations[0].getarg(1) == ConstPtr(s_gcref)
-        assert operations[0].result == v_result
-        # check that s_gcref gets added to the list anyway, to make sure
-        # that the GC sees it
-        assert seen == [s_gcref]
+        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+                                                   gcrefs)
+        assert operations2 == operations
+        assert gcrefs == [s_gcref]
 
     def test_rewrite_assembler_2(self):
         # check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
             ]
         gc_ll_descr = self.gc_ll_descr
         operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+                                                   [])
         assert len(operations) == 2
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,90 @@
 
     def test_rewrite_assembler_3(self):
         # check write barriers before SETARRAYITEM_GC
-        v_base = BoxPtr()
-        v_index = BoxInt()
-        v_value = BoxPtr()
-        array_descr = AbstractDescr()
-        operations = [
-            ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
-                         descr=array_descr),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        operations = get_deep_immutable_oplist(operations)
-        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
-        assert len(operations) == 2
-        #
-        assert operations[0].getopnum() == rop.COND_CALL_GC_WB
-        assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
-        assert operations[0].result is None
-        #
-        assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
-        assert operations[1].getarg(0) == v_base
-        assert operations[1].getarg(1) == v_index
-        assert operations[1].getarg(2) == v_value
-        assert operations[1].getdescr() == array_descr
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            assert operations[0].getarg(1) == v_value
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
+
+    def test_rewrite_assembler_4(self):
+        # check write barriers before SETARRAYITEM_GC,
+        # if we have actually a write_barrier_from_array.
+        self.llop1._have_wb_from_array = True
+        for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+            v_base = BoxPtr()
+            v_index = BoxInt()
+            v_value = BoxPtr()
+            array_descr = AbstractDescr()
+            operations = [
+                ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+                             None, descr=array_descr),
+                ]
+            if v_new_length is not None:
+                operations.insert(0, ResOperation(rop.NEW_ARRAY,
+                                                  [v_new_length], v_base,
+                                                  descr=array_descr))
+                # we need to insert another, unrelated NEW_ARRAY here
+                # to prevent the initialization_store optimization
+                operations.insert(1, ResOperation(rop.NEW_ARRAY,
+                                                  [ConstInt(12)], BoxPtr(),
+                                                  descr=array_descr))
+            gc_ll_descr = self.gc_ll_descr
+            operations = get_deep_immutable_oplist(operations)
+            operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                       operations, [])
+            if v_new_length is not None:
+                assert operations[0].getopnum() == rop.NEW_ARRAY
+                assert operations[1].getopnum() == rop.NEW_ARRAY
+                del operations[:2]
+            assert len(operations) == 2
+            #
+            assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+            assert operations[0].getarg(0) == v_base
+            if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+                assert operations[0].getarg(1) == v_value
+            else:
+                assert operations[0].getarg(1) == v_index
+            assert operations[0].result is None
+            #
+            assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+            assert operations[1].getarg(0) == v_base
+            assert operations[1].getarg(1) == v_index
+            assert operations[1].getarg(2) == v_value
+            assert operations[1].getdescr() == array_descr
 
     def test_rewrite_assembler_initialization_store(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +586,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +612,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
     def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +632,8 @@
         jump()
         """, namespace=locals())
         operations = get_deep_immutable_oplist(ops.operations)
-        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+                                                        operations, [])
         equaloplists(operations, expected.operations)
 
 class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1680,7 +1680,7 @@
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1699,12 +1699,48 @@
             s = lltype.malloc(S)
             s.tid = value
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            t = lltype.malloc(S)
+            tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [(s, t)]
+            else:
+                assert record == []
+
+    def test_cond_call_gc_wb_array(self):
+        def func_void(a, b):
+            record.append((a, b))
+        record = []
+        #
+        S = lltype.GcStruct('S', ('tid', lltype.Signed))
+        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+        funcbox = self.get_funcbox(self.cpu, func_ptr)
+        class WriteBarrierDescr(AbstractDescr):
+            jit_wb_if_flag = 4096
+            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_singlebyte = 0x10
+            def get_write_barrier_from_array_fn(self, cpu):
+                return funcbox.getint()
+        #
+        for cond in [False, True]:
+            value = random.randrange(-sys.maxint, sys.maxint)
+            if cond:
+                value |= 4096
+            else:
+                value &= ~4096
+            s = lltype.malloc(S)
+            s.tid = value
+            sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+            del record[:]
+            self.execute_operation(rop.COND_CALL_GC_WB,
+                                   [BoxPtr(sgcref), ConstInt(123)],
+                                   'void', descr=WriteBarrierDescr())
+            if cond:
+                assert record == [(s, 123)]
             else:
                 assert record == []
 
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -322,6 +322,7 @@
         # for the duration of compiling one loop or a one bridge.
 
         clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
         looptoken.compiled_loop_token = clt
         if not we_are_translated():
             # Arguments should be unique
@@ -335,7 +336,8 @@
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+                                                    looptoken, clt.allgcrefs)
         looptoken._x86_arglocs = arglocs
 
         bootstrappos = self.mc.get_relative_pos()
@@ -407,7 +409,8 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                             operations)
+                                             operations,
+                                             self.current_clt.allgcrefs)
 
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -499,9 +502,9 @@
                 funcname = op.getarg(0)._get_str()
                 break
         else:
-            funcname = "<loop %d>" % len(self.loop_run_counters)
-        # invent the counter, so we don't get too confused
-        return funcname
+            funcname = '?'
+        return "%s (loop counter %d)" % (funcname,
+                                         len(self.loop_run_counters))
 
     def _register_counter(self):
         if self._debug:
@@ -2079,6 +2082,8 @@
         # function remember_young_pointer() from the GC.  The two arguments
         # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
         # registers that need to be saved and restored across the call.
+        # If op.getarg(1) is a int, it is an array index and we must call
+        # instead remember_young_pointer_from_array().
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
@@ -2110,13 +2115,19 @@
             remap_frame_layout(self, arglocs[:2], [edi, esi],
                                X86_64_SCRATCH_REG)
 
+        if op.getarg(1).type == INT:
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            assert func != 0
+        else:
+            func = descr.get_write_barrier_fn(self.cpu)
+
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the
         # write barrier does not touch the xmm registers.  (Slightly delicate
         # assumption, given that the write barrier can end up calling the
         # platform's malloc() from AddressStack.append().  XXX may need to
         # be done properly)
-        self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+        self.mc.CALL(imm(func))
         if IS_X86_32:
             self.mc.ADD_ri(esp.value, 2*WORD)
         for i in range(2, len(arglocs)):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -157,11 +157,12 @@
         # to be read/used by the assembler too
         self.jump_target_descr = None
 
-    def _prepare(self, inputargs, operations):
+    def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+                                                       allgcrefs)
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -172,15 +173,16 @@
                                    assembler = self.assembler)
         return operations
 
-    def prepare_loop(self, inputargs, operations, looptoken):
-        operations = self._prepare(inputargs, operations)
+    def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         jump = operations[-1]
         loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
         return self._process_inputargs(inputargs), operations
 
-    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        operations = self._prepare(inputargs, operations)
+    def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+                       allgcrefs):
+        operations = self._prepare(inputargs, operations, allgcrefs)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
@@ -864,12 +866,12 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
+        loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
+        # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
         # because it will be needed anyway by the following setfield_gc.
         # It avoids loading it twice from the memory.
         loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
-        arglocs = [loc_base, loc_newvalue]
+        arglocs = [loc_base, loc_newvalue_or_index]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
-        self.gcrefs = GcRefList()
-        self.gcrefs.initialize()
-        self.single_gcref_descr = GcPtrFieldDescr('', 0)
+        pass
 
-    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -362,7 +362,7 @@
         operations[3].setfailargs([i1])
         self.cpu.compile_loop(inputargs, operations, looptoken)
         name, loopaddress, loopsize = agent.functions[0]
-        assert name == "Loop # 17: hello"
+        assert name == "Loop # 17: hello (loop counter 0)"
         assert loopaddress <= looptoken._x86_loop_code
         assert loopsize >= 40 # randomish number
 
@@ -378,7 +378,7 @@
 
         self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
         name, address, size = agent.functions[1]
-        assert name == "Bridge # 0: bye"
+        assert name == "Bridge # 0: bye (loop counter 1)"
         # Would be exactly ==, but there are some guard failure recovery
         # stubs in-between
         assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
 """
-This is a test that translates a complete JIT to C and runs it.  It is
-not testing much, expect that it basically works.  What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
 """
 
 import weakref
@@ -14,7 +13,7 @@
 from pypy.rlib.jit import JitDriver, dont_look_inside
 from pypy.rlib.jit import purefunction, unroll_safe
 from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
+from pypy.jit.backend.llsupport.gc import GcRootMap_asmgcc
 from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 from pypy.tool.udir import udir
 from pypy.jit.backend.x86.arch import IS_X86_64
@@ -456,6 +455,73 @@
     def test_compile_framework_7(self):
         self.run('compile_framework_7')
 
+    def define_compile_framework_8(cls):
+        # Array of pointers, of unknown length (test write_barrier_from_array)
+        def before(n, x):
+            return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+        def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            if n < 1900:
+                check(l[0].x == 123)
+                l = [None] * (16 + (n & 7))
+                l[0] = X(123)
+                l[1] = X(n)
+                l[2] = X(n+10)
+                l[3] = X(n+20)
+                l[4] = X(n+30)
+                l[5] = X(n+40)
+                l[6] = X(n+50)
+                l[7] = X(n+60)
+                l[8] = X(n+70)
+                l[9] = X(n+80)
+                l[10] = X(n+90)
+                l[11] = X(n+100)
+                l[12] = X(n+110)
+                l[13] = X(n+120)
+                l[14] = X(n+130)
+                l[15] = X(n+140)
+            if n < 1800:
+                check(len(l) == 16 + (n & 7))
+                check(l[0].x == 123)
+                check(l[1].x == n)
+                check(l[2].x == n+10)
+                check(l[3].x == n+20)
+                check(l[4].x == n+30)
+                check(l[5].x == n+40)
+                check(l[6].x == n+50)
+                check(l[7].x == n+60)
+                check(l[8].x == n+70)
+                check(l[9].x == n+80)
+                check(l[10].x == n+90)
+                check(l[11].x == n+100)
+                check(l[12].x == n+110)
+                check(l[13].x == n+120)
+                check(l[14].x == n+130)
+                check(l[15].x == n+140)
+            n -= x.foo
+            return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+        def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+            check(len(l) >= 16)
+            check(l[0].x == 123)
+            check(l[1].x == 2)
+            check(l[2].x == 12)
+            check(l[3].x == 22)
+            check(l[4].x == 32)
+            check(l[5].x == 42)
+            check(l[6].x == 52)
+            check(l[7].x == 62)
+            check(l[8].x == 72)
+            check(l[9].x == 82)
+            check(l[10].x == 92)
+            check(l[11].x == 102)
+            check(l[12].x == 112)
+            check(l[13].x == 122)
+            check(l[14].x == 132)
+            check(l[15].x == 142)
+        return before, f, after
+
+    def test_compile_framework_8(self):
+        self.run('compile_framework_8')
+
     def define_compile_framework_external_exception_handling(cls):
         def before(n, x):
             x = X(0)
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -471,7 +471,8 @@
     'STRSETITEM/3',
     'UNICODESETITEM/3',
     #'RUNTIMENEW/1',     # ootype operation
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
+    'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
+                          # (for the write barrier, latter is in an array)
     'DEBUG_MERGE_POINT/2',      # debugging only
     'JIT_DEBUG/*',              # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
         hop.exception_cannot_occur()
         return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
 
+def _make_sure_does_not_move(p):
+    """'p' is a non-null GC object.  This (tries to) make sure that the
+    object does not move any more, by forcing collections if needed.
+    Warning: should ideally only be used with the minimark GC, and only
+    on objects that are already a bit old, so have a chance to be
+    already non-movable."""
+    if not we_are_translated():
+        return
+    i = 0
+    while can_move(p):
+        if i > 6:
+            raise NotImplementedError("can't make object non-movable!")
+        collect(i)
+        i += 1
+
 def _heap_stats():
     raise NotImplementedError # can't be run directly
 
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1020,6 +1020,7 @@
                 objhdr.tid |= GCFLAG_CARDS_SET
 
         remember_young_pointer_from_array._dont_inline_ = True
+        assert self.card_page_indices > 0
         self.remember_young_pointer_from_array = (
             remember_young_pointer_from_array)
 
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -860,9 +860,9 @@
 
     def gct_get_write_barrier_from_array_failing_case(self, hop):
         op = hop.spaceop
-        hop.genop("same_as",
-                  [self.write_barrier_from_array_failing_case_ptr],
-                  resultvar=op.result)
+        v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+                    lltype.nullptr(op.result.concretetype.TO))
+        hop.genop("same_as", [v], resultvar=op.result)
 
     def gct_zero_gc_pointers_inside(self, hop):
         if not self.malloc_zero_filled:


More information about the pypy-commit mailing list