[pypy-svn] pypy jit-shadowstack: Reimplement support for the fastpath mallocs.

arigo commits-noreply at bitbucket.org
Thu Mar 31 16:56:00 CEST 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: jit-shadowstack
Changeset: r43053:db1614adf648
Date: 2011-03-31 16:37 +0200
http://bitbucket.org/pypy/pypy/changeset/db1614adf648/

Log:	Reimplement support for the fastpath mallocs.

diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -1,17 +1,28 @@
 # Constants that depend on whether we are on 32-bit or 64-bit
 
+# The frame size gives the standard fixed part at the start of
+# every assembler frame: the saved value of some registers,
+# one word for the force_index, and some extra space used only
+# during a malloc that needs to go via its slow path.
+
 import sys
 if sys.maxint == (2**31 - 1):
     WORD = 4
-    # ebp + ebx + esi + edi + force_index = 5 words
-    FRAME_FIXED_SIZE = 5
+    # ebp + ebx + esi + edi + force_index + 4 extra words = 9 words
+    FRAME_FIXED_SIZE = 9
+    FORCE_INDEX_OFS = -4*WORD
     IS_X86_32 = True
     IS_X86_64 = False
 else:
     WORD = 8
-    # rbp + rbx + r12 + r13 + r14 + r15 + force_index = 7 words
-    FRAME_FIXED_SIZE = 7
+    # rbp + rbx + r12 + r13 + r14 + r15 + force_index + 11 extra words = 18
+    FRAME_FIXED_SIZE = 18
+    FORCE_INDEX_OFS = -6*WORD
     IS_X86_32 = False
     IS_X86_64 = True
 
-FORCE_INDEX_OFS = -(FRAME_FIXED_SIZE-1)*WORD
+MY_COPY_OF_REGS = -(FRAME_FIXED_SIZE-1)*WORD
+# The extra space has room for almost all registers, apart from eax and edx
+# which are used in the malloc itself.  They are:
+#   ecx, ebx, esi, edi               [32 and 64 bits]
+#   r8, r9, r10, r12, r13, r14, r15    [64 bits only]

diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -8,9 +8,8 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
 from pypy.jit.backend.model import CompiledLoopToken
-from pypy.jit.backend.x86.regalloc import (RegAlloc, X86RegisterManager,
-                                           X86XMMRegisterManager, get_ebp_ofs,
-                                           _get_scale)
+from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs,
+                                           _get_scale, gpr_reg_mgr_cls)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
                                        IS_X86_32, IS_X86_64)
@@ -188,6 +187,8 @@
         #
         if gcrootmap.is_shadow_stack:
             # ---- shadowstack ----
+            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.MOV_br(ofs, reg.value)
             mc.SUB_ri(esp.value, 16 - WORD)      # stack alignment of 16 bytes
             if IS_X86_32:
                 mc.MOV_sr(0, edx.value)          # push argument
@@ -195,6 +196,8 @@
                 mc.MOV_rr(edi.value, edx.value)
             mc.CALL(imm(addr))
             mc.ADD_ri(esp.value, 16 - WORD)
+            for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.MOV_rb(reg.value, ofs)
         else:
             # ---- asmgcc ----
             if IS_X86_32:
@@ -736,8 +739,8 @@
         nonfloatlocs, floatlocs = arglocs
         self._call_header()
         stackadjustpos = self._patchable_stackadjust()
-        tmp = X86RegisterManager.all_regs[0]
-        xmmtmp = X86XMMRegisterManager.all_regs[0]
+        tmp = eax
+        xmmtmp = xmm0
         self.mc.begin_reuse_scratch_register()
         for i in range(len(nonfloatlocs)):
             loc = nonfloatlocs[i]
@@ -1961,7 +1964,7 @@
             # load the return value from fail_boxes_xxx[0]
             kind = op.result.type
             if kind == FLOAT:
-                xmmtmp = X86XMMRegisterManager.all_regs[0]
+                xmmtmp = xmm0
                 adr = self.fail_boxes_float.get_addr_for_num(0)
                 self.mc.MOVSD(xmmtmp, heap(adr))
                 self.mc.MOVSD(result_loc, xmmtmp)
@@ -2056,12 +2059,12 @@
         not_implemented("not implemented operation (guard): %s" %
                         op.getopname())
 
-    def mark_gc_roots(self, force_index):
+    def mark_gc_roots(self, force_index, use_copy_area=False):
         if force_index < 0:
             return     # not needed
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap:
-            mark = self._regalloc.get_mark_gc_roots(gcrootmap)
+            mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
             if gcrootmap.is_shadow_stack:
                 gcrootmap.write_callshape(mark, force_index)
             else:
@@ -2106,7 +2109,8 @@
             # there are two helpers to call only with asmgcc
             slowpath_addr1 = self.malloc_fixedsize_slowpath1
             self.mc.CALL(imm(slowpath_addr1))
-        self.mark_gc_roots()
+        self.mark_gc_roots(self.write_new_force_index(),
+                           use_copy_area=gcrootmap.is_shadow_stack)
         slowpath_addr2 = self.malloc_fixedsize_slowpath2
         self.mc.CALL(imm(slowpath_addr2))
 

diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -543,6 +543,9 @@
     # x87 instructions
     FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
 
+    # reserved as an illegal instruction
+    UD2 = insn('\x0F\x0B')
+
     # ------------------------------ SSE2 ------------------------------
 
     # Conversion

diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -19,7 +19,8 @@
 from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
 from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
      TempBox
-from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE, IS_X86_32, IS_X86_64
+from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
+from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
 from pypy.rlib.rarithmetic import r_longlong, r_uint
 
 class X86RegisterManager(RegisterManager):
@@ -34,6 +35,12 @@
         esi: 2,
         edi: 3,
     }
+    REGLOC_TO_COPY_AREA_OFS = {
+        ecx: MY_COPY_OF_REGS + 0 * WORD,
+        ebx: MY_COPY_OF_REGS + 1 * WORD,
+        esi: MY_COPY_OF_REGS + 2 * WORD,
+        edi: MY_COPY_OF_REGS + 3 * WORD,
+    }
 
     def call_result_location(self, v):
         return eax
@@ -61,6 +68,19 @@
         r14: 4,
         r15: 5,
     }
+    REGLOC_TO_COPY_AREA_OFS = {
+        ecx: MY_COPY_OF_REGS + 0 * WORD,
+        ebx: MY_COPY_OF_REGS + 1 * WORD,
+        esi: MY_COPY_OF_REGS + 2 * WORD,
+        edi: MY_COPY_OF_REGS + 3 * WORD,
+        r8:  MY_COPY_OF_REGS + 4 * WORD,
+        r9:  MY_COPY_OF_REGS + 5 * WORD,
+        r10: MY_COPY_OF_REGS + 6 * WORD,
+        r12: MY_COPY_OF_REGS + 7 * WORD,
+        r13: MY_COPY_OF_REGS + 8 * WORD,
+        r14: MY_COPY_OF_REGS + 9 * WORD,
+        r15: MY_COPY_OF_REGS + 10 * WORD,
+    }
 
 class X86XMMRegisterManager(RegisterManager):
 
@@ -117,6 +137,16 @@
         else:
             return 1
 
+if WORD == 4:
+    gpr_reg_mgr_cls = X86RegisterManager
+    xmm_reg_mgr_cls = X86XMMRegisterManager
+elif WORD == 8:
+    gpr_reg_mgr_cls = X86_64_RegisterManager
+    xmm_reg_mgr_cls = X86_64_XMMRegisterManager
+else:
+    raise AssertionError("Word size should be 4 or 8")
+
+
 class RegAlloc(object):
 
     def __init__(self, assembler, translate_support_code=False):
@@ -135,16 +165,6 @@
         # compute longevity of variables
         longevity = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
-        # XXX
-        if cpu.WORD == 4:
-            gpr_reg_mgr_cls = X86RegisterManager
-            xmm_reg_mgr_cls = X86XMMRegisterManager
-        elif cpu.WORD == 8:
-            gpr_reg_mgr_cls = X86_64_RegisterManager
-            xmm_reg_mgr_cls = X86_64_XMMRegisterManager
-        else:
-            raise AssertionError("Word size should be 4 or 8")
-            
         self.rm = gpr_reg_mgr_cls(longevity,
                                   frame_manager = self.fm,
                                   assembler = self.assembler)
@@ -841,20 +861,29 @@
         self.rm.possibly_free_vars_for_op(op)
 
     def _fastpath_malloc(self, op, descr):
-        XXX
         assert isinstance(descr, BaseSizeDescr)
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-        # We need to force-allocate each of save_around_call_regs now.
-        # The alternative would be to save and restore them around the
-        # actual call to malloc(), in the rare case where we need to do
-        # it; however, mark_gc_roots() would need to be adapted to know
-        # where the variables end up being saved.  Messy.
-        for reg in self.rm.save_around_call_regs:
-            if reg is not eax:
-                tmp_box = TempBox()
-                self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
-                self.rm.possibly_free_var(tmp_box)
+
+        if gc_ll_descr.gcrootmap.is_shadow_stack:
+            # ---- shadowstack ----
+            # We need edx as a temporary, but otherwise don't save any more
+            # register.  See comments in _build_malloc_fixedsize_slowpath().
+            tmp_box = TempBox()
+            self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+            self.rm.possibly_free_var(tmp_box)
+        else:
+            # ---- asmgcc ----
+            # We need to force-allocate each of save_around_call_regs now.
+            # The alternative would be to save and restore them around the
+            # actual call to malloc(), in the rare case where we need to do
+            # it; however, mark_gc_roots() would need to be adapted to know
+            # where the variables end up being saved.  Messy.
+            for reg in self.rm.save_around_call_regs:
+                if reg is not eax:
+                    tmp_box = TempBox()
+                    self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
+                    self.rm.possibly_free_var(tmp_box)
 
         self.assembler.malloc_cond_fixedsize(
             gc_ll_descr.get_nursery_free_addr(),
@@ -864,8 +893,7 @@
 
     def consider_new(self, op):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        os.write(2, "fixme: consider_new\n")
-        if 0 and gc_ll_descr.can_inline_malloc(op.getdescr()):  # XXX
+        if gc_ll_descr.can_inline_malloc(op.getdescr()):
             self._fastpath_malloc(op, op.getdescr())
         else:
             args = gc_ll_descr.args_for_new(op.getdescr())
@@ -875,8 +903,7 @@
     def consider_new_with_vtable(self, op):
         classint = op.getarg(0).getint()
         descrsize = heaptracker.vtable2descr(self.assembler.cpu, classint)
-        os.write(2, "fixme: consider_new_with_vtable\n")
-        if 0 and self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize): # XXX
+        if self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize):
             self._fastpath_malloc(op, descrsize)
             self.assembler.set_vtable(eax, imm(classint))
             # result of fastpath malloc is in eax
@@ -1207,7 +1234,7 @@
     def consider_jit_debug(self, op):
         pass
 
-    def get_mark_gc_roots(self, gcrootmap):
+    def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
         shape = gcrootmap.get_basic_shape(IS_X86_64)
         for v, val in self.fm.frame_bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
@@ -1217,8 +1244,14 @@
             if reg is eax:
                 continue      # ok to ignore this one
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
-                assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
-                gcrootmap.add_callee_save_reg(shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+                if use_copy_area:
+                    assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
+                    area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
+                    gcrootmap.add_frame_offset(shape, area_offset)
+                else:
+                    assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
+                    gcrootmap.add_callee_save_reg(
+                        shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
         return gcrootmap.compress_callshape(shape,
                                             self.assembler.datablockwrapper)
 

diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -77,8 +77,11 @@
     #
     can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
     def can_inline_malloc2(*args):
-        if os.getenv('PYPY_NO_INLINE_MALLOC'):
-            return False
+        try:
+            if os.environ['PYPY_NO_INLINE_MALLOC']:
+                return False
+        except KeyError:
+            pass
         return can_inline_malloc1(*args)
     #
     return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
@@ -215,7 +218,7 @@
         env = {'PYPYLOG': ':%s' % pypylog,
                'PYPY_NO_INLINE_MALLOC': '1'}
         self._run(name, n, env)
-        del env['PYPY_NO_INLINE_MALLOC']
+        env['PYPY_NO_INLINE_MALLOC'] = ''
         self._run(name, n, env)
 
     def run_orig(self, name, n, x):


More information about the Pypy-commit mailing list