[pypy-commit] pypy op_malloc_gc: Finish the integration of the x86 backend, which means

arigo noreply at buildbot.pypy.org
Sun Dec 18 19:54:34 CET 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: op_malloc_gc
Changeset: r50677:091dcde9fb47
Date: 2011-12-18 19:54 +0100
http://bitbucket.org/pypy/pypy/changeset/091dcde9fb47/

Log:	Finish the integration of the x86 backend, which means mostly
	killing stuff.

diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -11,7 +11,7 @@
 from pypy.jit.backend.llsupport.descr import (
     get_size_descr, get_field_descr, get_array_descr,
     get_call_descr, get_interiorfield_descr,
-    FieldDescr, ArrayDescr, CallDescr)
+    FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr)
 from pypy.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
 
 
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -2448,9 +2448,8 @@
         else:
             self.mc.JMP(imm(target))
 
-    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, tid):
-        size = max(size, self.cpu.gc_ll_descr.minimal_size_in_nursery)
-        size = (size + WORD-1) & ~(WORD-1)     # round up
+    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
+        assert size & (WORD-1) == 0     # must be correctly aligned
         self.mc.MOV(eax, heap(nursery_free_adr))
         self.mc.LEA_rm(edx.value, (eax.value, size))
         self.mc.CMP(edx, heap(nursery_top_adr))
@@ -2486,9 +2485,6 @@
         offset = self.mc.get_relative_pos() - jmp_adr
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr-1, chr(offset))
-        # on 64-bits, 'tid' is a value that fits in 31 bits
-        assert rx86.fits_in_32bits(tid)
-        self.mc.MOV_mi((eax.value, 0), tid)
         self.mc.MOV(heap(nursery_free_adr), edx)
 
 genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -960,24 +960,10 @@
 
     consider_cond_call_gc_wb_array = consider_cond_call_gc_wb
 
-    def fastpath_malloc_fixedsize(self, op, descr):
-        KILLME
-        assert isinstance(descr, SizeDescr)
-        self._do_fastpath_malloc(op, descr.size, descr.tid)
-
-    def fastpath_malloc_varsize(self, op, arraydescr, num_elem):
-        KILLME
-        assert isinstance(arraydescr, BaseArrayDescr)
-        ofs_length = arraydescr.get_ofs_length(self.translate_support_code)
-        basesize = arraydescr.get_base_size(self.translate_support_code)
-        itemsize = arraydescr.get_item_size(self.translate_support_code)
-        size = basesize + itemsize * num_elem
-        self._do_fastpath_malloc(op, size, arraydescr.tid)
-        self.assembler.set_new_array_length(eax, ofs_length, imm(num_elem))
-
-    def _do_fastpath_malloc(self, op, size, tid):
-        KILLME
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
+    def consider_call_malloc_nursery(self, op):
+        size_box = op.getarg(0)
+        assert isinstance(size_box, ConstInt)
+        size = size_box.getint()
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
         #
         # We need edx as a temporary, but otherwise don't save any more
@@ -986,61 +972,11 @@
         self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
         self.rm.possibly_free_var(tmp_box)
         #
+        gc_ll_descr = self.assembler.cpu.gc_ll_descr
         self.assembler.malloc_cond(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
-            size, tid,
-            )
-
-    def consider_new(self, op):
-        KILLME
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        if gc_ll_descr.can_inline_malloc(op.getdescr()):
-            self.fastpath_malloc_fixedsize(op, op.getdescr())
-        else:
-            args = gc_ll_descr.args_for_new(op.getdescr())
-            arglocs = [imm(x) for x in args]
-            return self._call(op, arglocs)
-
-    def consider_new_with_vtable(self, op):
-        KILLME
-        classint = op.getarg(0).getint()
-        descrsize = heaptracker.vtable2descr(self.assembler.cpu, classint)
-        if self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize):
-            self.fastpath_malloc_fixedsize(op, descrsize)
-            self.assembler.set_vtable(eax, imm(classint))
-            # result of fastpath malloc is in eax
-        else:
-            args = self.assembler.cpu.gc_ll_descr.args_for_new(descrsize)
-            arglocs = [imm(x) for x in args]
-            arglocs.append(self.loc(op.getarg(0)))
-            return self._call(op, arglocs)
-
-    def consider_newstr(self, op):
-        KILLME
-        loc = self.loc(op.getarg(0))
-        return self._call(op, [loc])
-
-    def consider_newunicode(self, op):
-        KILLME
-        loc = self.loc(op.getarg(0))
-        return self._call(op, [loc])
-
-    def consider_new_array(self, op):
-        KILLME
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        box_num_elem = op.getarg(0)
-        if isinstance(box_num_elem, ConstInt):
-            num_elem = box_num_elem.value
-            if gc_ll_descr.can_inline_malloc_varsize(op.getdescr(),
-                                                     num_elem):
-                self.fastpath_malloc_varsize(op, op.getdescr(), num_elem)
-                return
-        args = self.assembler.cpu.gc_ll_descr.args_for_new_array(
-            op.getdescr())
-        arglocs = [imm(x) for x in args]
-        arglocs.append(self.loc(box_num_elem))
-        self._call(op, arglocs)
+            size)
 
     def _unpack_arraydescr(self, arraydescr):
         assert isinstance(arraydescr, ArrayDescr)
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -8,7 +8,7 @@
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.codewriter import heaptracker
 from pypy.jit.codewriter.effectinfo import EffectInfo
-from pypy.jit.backend.llsupport.descr import GcCache
+from pypy.jit.backend.llsupport.descr import GcCache, FieldDescr, FLAG_SIGNED
 from pypy.jit.backend.llsupport.gc import GcLLDescription
 from pypy.jit.backend.detect_cpu import getcpuclass
 from pypy.jit.backend.x86.regalloc import RegAlloc
@@ -17,7 +17,7 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
 
 from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
 from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -41,20 +41,15 @@
         return ['compressed'] + shape[1:]
 
 class MockGcDescr(GcCache):
-    def get_funcptr_for_new(self):
-        return 123
-    get_funcptr_for_newarray = get_funcptr_for_new
-    get_funcptr_for_newstr = get_funcptr_for_new
-    get_funcptr_for_newunicode = get_funcptr_for_new
     get_malloc_slowpath_addr = None
-
+    write_barrier_descr = None
     moving_gc = True
     gcrootmap = MockGcRootMap()
 
     def initialize(self):
         pass
 
-    record_constptrs = GcLLDescr_framework.record_constptrs.im_func
+    _record_constptrs = GcLLDescr_framework._record_constptrs.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 
 class TestRegallocDirectGcIntegration(object):
@@ -170,42 +165,32 @@
         '''
         self.interpret(ops, [0, 0, 0, 0, 0, 0, 0, 0, 0], run=False)
 
+NOT_INITIALIZED = chr(0xdd)
+
 class GCDescrFastpathMalloc(GcLLDescription):
     gcrootmap = None
-    expected_malloc_slowpath_size = WORD*2
+    write_barrier_descr = None
 
     def __init__(self):
-        GcCache.__init__(self, False)
+        GcLLDescription.__init__(self, None)
         # create a nursery
-        NTP = rffi.CArray(lltype.Signed)
-        self.nursery = lltype.malloc(NTP, 16, flavor='raw')
-        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 3,
+        NTP = rffi.CArray(lltype.Char)
+        self.nursery = lltype.malloc(NTP, 64, flavor='raw')
+        for i in range(64):
+            self.nursery[i] = NOT_INITIALIZED
+        self.addrs = lltype.malloc(rffi.CArray(lltype.Signed), 2,
                                    flavor='raw')
         self.addrs[0] = rffi.cast(lltype.Signed, self.nursery)
-        self.addrs[1] = self.addrs[0] + 16*WORD
-        self.addrs[2] = 0
-        # 16 WORDs
+        self.addrs[1] = self.addrs[0] + 64
+        self.calls = []
         def malloc_slowpath(size):
-            assert size == self.expected_malloc_slowpath_size
+            self.calls.append(size)
+            # reset the nursery
             nadr = rffi.cast(lltype.Signed, self.nursery)
             self.addrs[0] = nadr + size
-            self.addrs[2] += 1
             return nadr
-        self.malloc_slowpath = malloc_slowpath
-        self.MALLOC_SLOWPATH = lltype.FuncType([lltype.Signed],
-                                               lltype.Signed)
-        self._counter = 123000
-
-    def can_inline_malloc(self, descr):
-        return True
-
-    def get_funcptr_for_new(self):
-        return 42
-#        return llhelper(lltype.Ptr(self.NEW_TP), self.new)
-
-    def init_size_descr(self, S, descr):
-        descr.tid = self._counter
-        self._counter += 1
+        self.generate_function('malloc_nursery', malloc_slowpath,
+                               [lltype.Signed], lltype.Signed)
 
     def get_nursery_free_addr(self):
         return rffi.cast(lltype.Signed, self.addrs)
@@ -214,204 +199,61 @@
         return rffi.cast(lltype.Signed, self.addrs) + WORD
 
     def get_malloc_slowpath_addr(self):
-        fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
-        return rffi.cast(lltype.Signed, fptr)
+        return self.c_malloc_nursery_fn.value
 
-    get_funcptr_for_newarray = None
-    get_funcptr_for_newstr = None
-    get_funcptr_for_newunicode = None
+    def check_nothing_in_nursery(self):
+        # CALL_MALLOC_NURSERY should not write anything in the nursery
+        for i in range(64):
+            assert self.nursery[i] == NOT_INITIALIZED
 
 class TestMallocFastpath(BaseTestRegalloc):
 
     def setup_method(self, method):
         cpu = CPU(None, None)
-        cpu.vtable_offset = WORD
         cpu.gc_ll_descr = GCDescrFastpathMalloc()
         cpu.setup_once()
+        self.cpu = cpu
 
-        # hack: specify 'tid' explicitly, because this test is not running
-        # with the gc transformer
-        NODE = lltype.GcStruct('node', ('tid', lltype.Signed),
-                                       ('value', lltype.Signed))
-        nodedescr = cpu.sizeof(NODE)
-        valuedescr = cpu.fielddescrof(NODE, 'value')
-
-        self.cpu = cpu
-        self.nodedescr = nodedescr
-        vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
-        vtable_int = cpu.cast_adr_to_int(llmemory.cast_ptr_to_adr(vtable))
-        NODE2 = lltype.GcStruct('node2',
-                                  ('parent', rclass.OBJECT),
-                                  ('tid', lltype.Signed),
-                                  ('vtable', lltype.Ptr(rclass.OBJECT_VTABLE)))
-        descrsize = cpu.sizeof(NODE2)
-        heaptracker.register_known_gctype(cpu, vtable, NODE2)
-        self.descrsize = descrsize
-        self.vtable_int = vtable_int
-
-        self.namespace = locals().copy()
-        
     def test_malloc_fastpath(self):
         ops = '''
-        [i0]
-        p0 = new(descr=nodedescr)
-        setfield_gc(p0, i0, descr=valuedescr)
-        finish(p0)
+        []
+        p0 = call_malloc_nursery(16)
+        p1 = call_malloc_nursery(32)
+        p2 = call_malloc_nursery(16)
+        finish(p0, p1, p2)
         '''
-        self.interpret(ops, [42])
-        # check the nursery
+        self.interpret(ops, [])
+        # check the returned pointers
         gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.nursery[0] == self.nodedescr.tid
-        assert gc_ll_descr.nursery[1] == 42
         nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*2)
-        assert gc_ll_descr.addrs[2] == 0   # slowpath never called
+        ref = self.cpu.get_latest_value_ref
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 16
+        assert rffi.cast(lltype.Signed, ref(2)) == nurs_adr + 48
+        # check the nursery content and state
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.addrs[0] == nurs_adr + 64
+        # slowpath never called
+        assert gc_ll_descr.calls == []
 
     def test_malloc_slowpath(self):
         ops = '''
         []
-        p0 = new(descr=nodedescr)
-        p1 = new(descr=nodedescr)
-        p2 = new(descr=nodedescr)
-        p3 = new(descr=nodedescr)
-        p4 = new(descr=nodedescr)
-        p5 = new(descr=nodedescr)
-        p6 = new(descr=nodedescr)
-        p7 = new(descr=nodedescr)
-        p8 = new(descr=nodedescr)
-        finish(p0, p1, p2, p3, p4, p5, p6, p7, p8)
+        p0 = call_malloc_nursery(16)
+        p1 = call_malloc_nursery(32)
+        p2 = call_malloc_nursery(24)     # overflow
+        finish(p0, p1, p2)
         '''
         self.interpret(ops, [])
+        # check the returned pointers
+        gc_ll_descr = self.cpu.gc_ll_descr
+        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
+        ref = self.cpu.get_latest_value_ref
+        assert rffi.cast(lltype.Signed, ref(0)) == nurs_adr + 0
+        assert rffi.cast(lltype.Signed, ref(1)) == nurs_adr + 16
+        assert rffi.cast(lltype.Signed, ref(2)) == nurs_adr + 0
+        # check the nursery content and state
+        gc_ll_descr.check_nothing_in_nursery()
+        assert gc_ll_descr.addrs[0] == nurs_adr + 24
         # this should call slow path once
-        gc_ll_descr = self.cpu.gc_ll_descr
-        nadr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nadr + (WORD*2)
-        assert gc_ll_descr.addrs[2] == 1   # slowpath called once
-
-    def test_new_with_vtable(self):
-        ops = '''
-        [i0, i1]
-        p0 = new_with_vtable(ConstClass(vtable))
-        guard_class(p0, ConstClass(vtable)) [i0]
-        finish(i1)
-        '''
-        self.interpret(ops, [0, 1])
-        assert self.getint(0) == 1
-        gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.nursery[0] == self.descrsize.tid
-        assert gc_ll_descr.nursery[1] == self.vtable_int
-        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*3)
-        assert gc_ll_descr.addrs[2] == 0   # slowpath never called
-
-
-class Seen(Exception):
-    pass
-
-class GCDescrFastpathMallocVarsize(GCDescrFastpathMalloc):
-    def can_inline_malloc_varsize(self, arraydescr, num_elem):
-        return num_elem < 5
-    def get_funcptr_for_newarray(self):
-        return 52
-    def init_array_descr(self, A, descr):
-        descr.tid = self._counter
-        self._counter += 1
-    def args_for_new_array(self, descr):
-        raise Seen("args_for_new_array")
-
-class TestMallocVarsizeFastpath(BaseTestRegalloc):
-    def setup_method(self, method):
-        cpu = CPU(None, None)
-        cpu.vtable_offset = WORD
-        cpu.gc_ll_descr = GCDescrFastpathMallocVarsize()
-        cpu.setup_once()
-        self.cpu = cpu
-
-        ARRAY = lltype.GcArray(lltype.Signed)
-        arraydescr = cpu.arraydescrof(ARRAY)
-        self.arraydescr = arraydescr
-        ARRAYCHAR = lltype.GcArray(lltype.Char)
-        arraychardescr = cpu.arraydescrof(ARRAYCHAR)
-
-        self.namespace = locals().copy()
-
-    def test_malloc_varsize_fastpath(self):
-        # Hack.  Running the GcLLDescr_framework without really having
-        # a complete GC means that we end up with both the tid and the
-        # length being at offset 0.  In this case, so the length overwrites
-        # the tid.  This is of course only the case in this test class.
-        ops = '''
-        []
-        p0 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p0, 0, 142, descr=arraydescr)
-        setarrayitem_gc(p0, 3, 143, descr=arraydescr)
-        finish(p0)
-        '''
-        self.interpret(ops, [])
-        # check the nursery
-        gc_ll_descr = self.cpu.gc_ll_descr
-        assert gc_ll_descr.nursery[0] == 4
-        assert gc_ll_descr.nursery[1] == 142
-        assert gc_ll_descr.nursery[4] == 143
-        nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
-        assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*5)
-        assert gc_ll_descr.addrs[2] == 0   # slowpath never called
-
-    def test_malloc_varsize_slowpath(self):
-        ops = '''
-        []
-        p0 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p0, 0, 420, descr=arraydescr)
-        setarrayitem_gc(p0, 3, 430, descr=arraydescr)
-        p1 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p1, 0, 421, descr=arraydescr)
-        setarrayitem_gc(p1, 3, 431, descr=arraydescr)
-        p2 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p2, 0, 422, descr=arraydescr)
-        setarrayitem_gc(p2, 3, 432, descr=arraydescr)
-        p3 = new_array(4, descr=arraydescr)
-        setarrayitem_gc(p3, 0, 423, descr=arraydescr)
-        setarrayitem_gc(p3, 3, 433, descr=arraydescr)
-        finish(p0, p1, p2, p3)
-        '''
-        gc_ll_descr = self.cpu.gc_ll_descr
-        gc_ll_descr.expected_malloc_slowpath_size = 5*WORD
-        self.interpret(ops, [])
-        assert gc_ll_descr.addrs[2] == 1   # slowpath called once
-
-    def test_malloc_varsize_too_big(self):
-        ops = '''
-        []
-        p0 = new_array(5, descr=arraydescr)
-        finish(p0)
-        '''
-        py.test.raises(Seen, self.interpret, ops, [])
-
-    def test_malloc_varsize_variable(self):
-        ops = '''
-        [i0]
-        p0 = new_array(i0, descr=arraydescr)
-        finish(p0)
-        '''
-        py.test.raises(Seen, self.interpret, ops, [])
-
-    def test_malloc_array_of_char(self):
-        # check that fastpath_malloc_varsize() respects the alignment
-        # of the pointer in the nursery
-        ops = '''
-        []
-        p1 = new_array(1, descr=arraychardescr)
-        p2 = new_array(2, descr=arraychardescr)
-        p3 = new_array(3, descr=arraychardescr)
-        p4 = new_array(4, descr=arraychardescr)
-        finish(p1, p2, p3, p4)
-        '''
-        self.interpret(ops, [])
-        p1 = self.getptr(0, llmemory.GCREF)
-        p2 = self.getptr(1, llmemory.GCREF)
-        p3 = self.getptr(2, llmemory.GCREF)
-        p4 = self.getptr(3, llmemory.GCREF)
-        assert p1._obj.intval & (WORD-1) == 0    # aligned
-        assert p2._obj.intval & (WORD-1) == 0    # aligned
-        assert p3._obj.intval & (WORD-1) == 0    # aligned
-        assert p4._obj.intval & (WORD-1) == 0    # aligned
+        assert gc_ll_descr.calls == [24]


More information about the pypy-commit mailing list