[pypy-svn] pypy jit-shadowstack: Reimplement support for the fastpath mallocs.
arigo
commits-noreply at bitbucket.org
Thu Mar 31 16:56:00 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch: jit-shadowstack
Changeset: r43053:db1614adf648
Date: 2011-03-31 16:37 +0200
http://bitbucket.org/pypy/pypy/changeset/db1614adf648/
Log: Reimplement support for the fastpath mallocs.
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -1,17 +1,28 @@
# Constants that depend on whether we are on 32-bit or 64-bit
+# The frame size gives the standard fixed part at the start of
+# every assembler frame: the saved value of some registers,
+# one word for the force_index, and some extra space used only
+# during a malloc that needs to go via its slow path.
+
import sys
if sys.maxint == (2**31 - 1):
WORD = 4
- # ebp + ebx + esi + edi + force_index = 5 words
- FRAME_FIXED_SIZE = 5
+ # ebp + ebx + esi + edi + force_index + 4 extra words = 9 words
+ FRAME_FIXED_SIZE = 9
+ FORCE_INDEX_OFS = -4*WORD
IS_X86_32 = True
IS_X86_64 = False
else:
WORD = 8
- # rbp + rbx + r12 + r13 + r14 + r15 + force_index = 7 words
- FRAME_FIXED_SIZE = 7
+ # rbp + rbx + r12 + r13 + r14 + r15 + force_index + 11 extra words = 18
+ FRAME_FIXED_SIZE = 18
+ FORCE_INDEX_OFS = -6*WORD
IS_X86_32 = False
IS_X86_64 = True
-FORCE_INDEX_OFS = -(FRAME_FIXED_SIZE-1)*WORD
+MY_COPY_OF_REGS = -(FRAME_FIXED_SIZE-1)*WORD
+# The extra space has room for almost all registers, apart from eax and edx
+# which are used in the malloc itself. They are:
+# ecx, ebx, esi, edi [32 and 64 bits]
+# r8, r9, r10, r12, r13, r14, r15 [64 bits only]
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -8,9 +8,8 @@
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.annlowlevel import llhelper
from pypy.jit.backend.model import CompiledLoopToken
-from pypy.jit.backend.x86.regalloc import (RegAlloc, X86RegisterManager,
- X86XMMRegisterManager, get_ebp_ofs,
- _get_scale)
+from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs,
+ _get_scale, gpr_reg_mgr_cls)
from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
IS_X86_32, IS_X86_64)
@@ -188,6 +187,8 @@
#
if gcrootmap.is_shadow_stack:
# ---- shadowstack ----
+ for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
+ mc.MOV_br(ofs, reg.value)
mc.SUB_ri(esp.value, 16 - WORD) # stack alignment of 16 bytes
if IS_X86_32:
mc.MOV_sr(0, edx.value) # push argument
@@ -195,6 +196,8 @@
mc.MOV_rr(edi.value, edx.value)
mc.CALL(imm(addr))
mc.ADD_ri(esp.value, 16 - WORD)
+ for reg, ofs in gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items():
+ mc.MOV_rb(reg.value, ofs)
else:
# ---- asmgcc ----
if IS_X86_32:
@@ -736,8 +739,8 @@
nonfloatlocs, floatlocs = arglocs
self._call_header()
stackadjustpos = self._patchable_stackadjust()
- tmp = X86RegisterManager.all_regs[0]
- xmmtmp = X86XMMRegisterManager.all_regs[0]
+ tmp = eax
+ xmmtmp = xmm0
self.mc.begin_reuse_scratch_register()
for i in range(len(nonfloatlocs)):
loc = nonfloatlocs[i]
@@ -1961,7 +1964,7 @@
# load the return value from fail_boxes_xxx[0]
kind = op.result.type
if kind == FLOAT:
- xmmtmp = X86XMMRegisterManager.all_regs[0]
+ xmmtmp = xmm0
adr = self.fail_boxes_float.get_addr_for_num(0)
self.mc.MOVSD(xmmtmp, heap(adr))
self.mc.MOVSD(result_loc, xmmtmp)
@@ -2056,12 +2059,12 @@
not_implemented("not implemented operation (guard): %s" %
op.getopname())
- def mark_gc_roots(self, force_index):
+ def mark_gc_roots(self, force_index, use_copy_area=False):
if force_index < 0:
return # not needed
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap:
- mark = self._regalloc.get_mark_gc_roots(gcrootmap)
+ mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
if gcrootmap.is_shadow_stack:
gcrootmap.write_callshape(mark, force_index)
else:
@@ -2106,7 +2109,8 @@
# there are two helpers to call only with asmgcc
slowpath_addr1 = self.malloc_fixedsize_slowpath1
self.mc.CALL(imm(slowpath_addr1))
- self.mark_gc_roots()
+ self.mark_gc_roots(self.write_new_force_index(),
+ use_copy_area=gcrootmap.is_shadow_stack)
slowpath_addr2 = self.malloc_fixedsize_slowpath2
self.mc.CALL(imm(slowpath_addr2))
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -543,6 +543,9 @@
# x87 instructions
FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+ # reserved as an illegal instruction
+ UD2 = insn('\x0F\x0B')
+
# ------------------------------ SSE2 ------------------------------
# Conversion
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -19,7 +19,8 @@
from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
TempBox
-from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE, IS_X86_32, IS_X86_64
+from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
+from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
from pypy.rlib.rarithmetic import r_longlong, r_uint
class X86RegisterManager(RegisterManager):
@@ -34,6 +35,12 @@
esi: 2,
edi: 3,
}
+ REGLOC_TO_COPY_AREA_OFS = {
+ ecx: MY_COPY_OF_REGS + 0 * WORD,
+ ebx: MY_COPY_OF_REGS + 1 * WORD,
+ esi: MY_COPY_OF_REGS + 2 * WORD,
+ edi: MY_COPY_OF_REGS + 3 * WORD,
+ }
def call_result_location(self, v):
return eax
@@ -61,6 +68,19 @@
r14: 4,
r15: 5,
}
+ REGLOC_TO_COPY_AREA_OFS = {
+ ecx: MY_COPY_OF_REGS + 0 * WORD,
+ ebx: MY_COPY_OF_REGS + 1 * WORD,
+ esi: MY_COPY_OF_REGS + 2 * WORD,
+ edi: MY_COPY_OF_REGS + 3 * WORD,
+ r8: MY_COPY_OF_REGS + 4 * WORD,
+ r9: MY_COPY_OF_REGS + 5 * WORD,
+ r10: MY_COPY_OF_REGS + 6 * WORD,
+ r12: MY_COPY_OF_REGS + 7 * WORD,
+ r13: MY_COPY_OF_REGS + 8 * WORD,
+ r14: MY_COPY_OF_REGS + 9 * WORD,
+ r15: MY_COPY_OF_REGS + 10 * WORD,
+ }
class X86XMMRegisterManager(RegisterManager):
@@ -117,6 +137,16 @@
else:
return 1
+if WORD == 4:
+ gpr_reg_mgr_cls = X86RegisterManager
+ xmm_reg_mgr_cls = X86XMMRegisterManager
+elif WORD == 8:
+ gpr_reg_mgr_cls = X86_64_RegisterManager
+ xmm_reg_mgr_cls = X86_64_XMMRegisterManager
+else:
+ raise AssertionError("Word size should be 4 or 8")
+
+
class RegAlloc(object):
def __init__(self, assembler, translate_support_code=False):
@@ -135,16 +165,6 @@
# compute longevity of variables
longevity = self._compute_vars_longevity(inputargs, operations)
self.longevity = longevity
- # XXX
- if cpu.WORD == 4:
- gpr_reg_mgr_cls = X86RegisterManager
- xmm_reg_mgr_cls = X86XMMRegisterManager
- elif cpu.WORD == 8:
- gpr_reg_mgr_cls = X86_64_RegisterManager
- xmm_reg_mgr_cls = X86_64_XMMRegisterManager
- else:
- raise AssertionError("Word size should be 4 or 8")
-
self.rm = gpr_reg_mgr_cls(longevity,
frame_manager = self.fm,
assembler = self.assembler)
@@ -841,20 +861,29 @@
self.rm.possibly_free_vars_for_op(op)
def _fastpath_malloc(self, op, descr):
- XXX
assert isinstance(descr, BaseSizeDescr)
gc_ll_descr = self.assembler.cpu.gc_ll_descr
self.rm.force_allocate_reg(op.result, selected_reg=eax)
- # We need to force-allocate each of save_around_call_regs now.
- # The alternative would be to save and restore them around the
- # actual call to malloc(), in the rare case where we need to do
- # it; however, mark_gc_roots() would need to be adapted to know
- # where the variables end up being saved. Messy.
- for reg in self.rm.save_around_call_regs:
- if reg is not eax:
- tmp_box = TempBox()
- self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
- self.rm.possibly_free_var(tmp_box)
+
+ if gc_ll_descr.gcrootmap.is_shadow_stack:
+ # ---- shadowstack ----
+ # We need edx as a temporary, but otherwise don't save any more
+ # register. See comments in _build_malloc_fixedsize_slowpath().
+ tmp_box = TempBox()
+ self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+ self.rm.possibly_free_var(tmp_box)
+ else:
+ # ---- asmgcc ----
+ # We need to force-allocate each of save_around_call_regs now.
+ # The alternative would be to save and restore them around the
+ # actual call to malloc(), in the rare case where we need to do
+ # it; however, mark_gc_roots() would need to be adapted to know
+ # where the variables end up being saved. Messy.
+ for reg in self.rm.save_around_call_regs:
+ if reg is not eax:
+ tmp_box = TempBox()
+ self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
+ self.rm.possibly_free_var(tmp_box)
self.assembler.malloc_cond_fixedsize(
gc_ll_descr.get_nursery_free_addr(),
@@ -864,8 +893,7 @@
def consider_new(self, op):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
- os.write(2, "fixme: consider_new\n")
- if 0 and gc_ll_descr.can_inline_malloc(op.getdescr()): # XXX
+ if gc_ll_descr.can_inline_malloc(op.getdescr()):
self._fastpath_malloc(op, op.getdescr())
else:
args = gc_ll_descr.args_for_new(op.getdescr())
@@ -875,8 +903,7 @@
def consider_new_with_vtable(self, op):
classint = op.getarg(0).getint()
descrsize = heaptracker.vtable2descr(self.assembler.cpu, classint)
- os.write(2, "fixme: consider_new_with_vtable\n")
- if 0 and self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize): # XXX
+ if self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize):
self._fastpath_malloc(op, descrsize)
self.assembler.set_vtable(eax, imm(classint))
# result of fastpath malloc is in eax
@@ -1207,7 +1234,7 @@
def consider_jit_debug(self, op):
pass
- def get_mark_gc_roots(self, gcrootmap):
+ def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
shape = gcrootmap.get_basic_shape(IS_X86_64)
for v, val in self.fm.frame_bindings.items():
if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
@@ -1217,8 +1244,14 @@
if reg is eax:
continue # ok to ignore this one
if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
- assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
- gcrootmap.add_callee_save_reg(shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
+ if use_copy_area:
+ assert reg in self.rm.REGLOC_TO_COPY_AREA_OFS
+ area_offset = self.rm.REGLOC_TO_COPY_AREA_OFS[reg]
+ gcrootmap.add_frame_offset(shape, area_offset)
+ else:
+ assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
+ gcrootmap.add_callee_save_reg(
+ shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
return gcrootmap.compress_callshape(shape,
self.assembler.datablockwrapper)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -77,8 +77,11 @@
#
can_inline_malloc1 = gc.GcLLDescr_framework.can_inline_malloc
def can_inline_malloc2(*args):
- if os.getenv('PYPY_NO_INLINE_MALLOC'):
- return False
+ try:
+ if os.environ['PYPY_NO_INLINE_MALLOC']:
+ return False
+ except KeyError:
+ pass
return can_inline_malloc1(*args)
#
return {(gc.GcLLDescr_framework, 'can_inline_malloc'): can_inline_malloc2}
@@ -215,7 +218,7 @@
env = {'PYPYLOG': ':%s' % pypylog,
'PYPY_NO_INLINE_MALLOC': '1'}
self._run(name, n, env)
- del env['PYPY_NO_INLINE_MALLOC']
+ env['PYPY_NO_INLINE_MALLOC'] = ''
self._run(name, n, env)
def run_orig(self, name, n, x):
More information about the Pypy-commit
mailing list