[pypy-commit] pypy continulet-jit-2: Starting to hack at the x86 backend. Right now I'm just hacking and
arigo
noreply at buildbot.pypy.org
Thu Mar 1 17:11:45 CET 2012
Author: Armin Rigo <arigo at tunes.org>
Branch: continulet-jit-2
Changeset: r53054:b2c0c89fe245
Date: 2012-03-01 16:01 +0100
http://bitbucket.org/pypy/pypy/changeset/b2c0c89fe245/
Log: Starting to hack at the x86 backend. Right now I'm just hacking and
will rely on "hg diff" to merge it more cleanly keeping both
versions.
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -30,3 +30,16 @@
#
# Note that with asmgcc, the locations corresponding to callee-save registers
# are never used.
+
+# In the offstack version (i.e. when using stacklets): the off-stack allocated
+# area starts with the FRAME_FIXED_SIZE words in the same order as they would
+# be on the real stack (which is top-to-bottom, so it's actually the opposite
+# order as the one in the comments above); but whereas the real stack would
+# have the spilled values stored in (ebp-20), (ebp-24), etc., the off-stack
+# has them stored in (ebp+8), (ebp+12), etc.
+#
+# In stacklet mode, the real frame contains always just OFFSTACK_REAL_FRAME
+# words reserved for temporary usage like call arguments. To maintain
+# alignment on 32-bit, OFFSTACK_REAL_FRAME % 4 == 3, and it is at least 17
+# to handle all other cases.
+OFFSTACK_REAL_FRAME = 19
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -13,7 +13,8 @@
gpr_reg_mgr_cls, _valid_addressing_size)
from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
- IS_X86_32, IS_X86_64)
+ IS_X86_32, IS_X86_64,
+ OFFSTACK_REAL_FRAME)
from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
esp, ebp, esi, edi,
@@ -84,6 +85,9 @@
self.malloc_slowpath1 = 0
self.malloc_slowpath2 = 0
self.memcpy_addr = 0
+ self.offstack_malloc = 0
+ self.offstack_realloc = 0
+ self.offstack_free = 0
self.setup_failure_recovery()
self._debug = False
self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
@@ -107,7 +111,11 @@
# the address of the function called by 'new'
gc_ll_descr = self.cpu.gc_ll_descr
gc_ll_descr.initialize()
- self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
+ cpi = self.cpu.cast_ptr_to_int
+ self.memcpy_addr = cpi(support.memcpy_fn)
+ self.offstack_malloc_addr = cpi(support.offstack_malloc_fn)
+ self.offstack_realloc_addr = cpi(support.offstack_realloc_fn)
+ self.offstack_free_addr = cpi(support.offstack_free_fn)
self._build_failure_recovery(False)
self._build_failure_recovery(True)
if self.cpu.supports_floats:
@@ -435,17 +443,17 @@
regalloc = RegAlloc(self, self.cpu.translate_support_code)
#
self._call_header_with_stack_check()
- stackadjustpos = self._patchable_stackadjust()
clt._debug_nbargs = len(inputargs)
operations = regalloc.prepare_loop(inputargs, operations,
looptoken, clt.allgcrefs)
looppos = self.mc.get_relative_pos()
looptoken._x86_loop_code = looppos
clt.frame_depth = -1 # temporarily
- clt.param_depth = -1 # temporarily
- frame_depth, param_depth = self._assemble(regalloc, operations)
+ #clt.param_depth = -1 # temporarily
+ (frame_depth#, param_depth
+ ) = self._assemble(regalloc, operations)
clt.frame_depth = frame_depth
- clt.param_depth = param_depth
+ #clt.param_depth = param_depth
#
size_excluding_failure_stuff = self.mc.get_relative_pos()
self.write_pending_failure_recoveries()
@@ -459,8 +467,8 @@
rawstart + size_excluding_failure_stuff,
rawstart))
debug_stop("jit-backend-addr")
- self._patch_stackadjust(rawstart + stackadjustpos,
- frame_depth + param_depth)
+ #self._patch_stackadjust(rawstart + stackadjustpos,
+ # frame_depth )#+ param_depth)
self.patch_pending_failure_recoveries(rawstart)
#
ops_offset = self.mc.ops_offset
@@ -529,7 +537,7 @@
ops_offset = self.mc.ops_offset
self.fixup_target_tokens(rawstart)
self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
- self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
+ #self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
self.teardown()
# oprofile support
if self.cpu.profile_agent is not None:
@@ -701,14 +709,14 @@
if we_are_translated() or self.cpu.dont_keepalive_stuff:
self._regalloc = None # else keep it around for debugging
frame_depth = regalloc.fm.get_frame_depth()
- param_depth = regalloc.param_depth
+ #param_depth = regalloc.param_depth
jump_target_descr = regalloc.jump_target_descr
if jump_target_descr is not None:
target_frame_depth = jump_target_descr._x86_clt.frame_depth
- target_param_depth = jump_target_descr._x86_clt.param_depth
+ #target_param_depth = jump_target_descr._x86_clt.param_depth
frame_depth = max(frame_depth, target_frame_depth)
- param_depth = max(param_depth, target_param_depth)
- return frame_depth, param_depth
+ #param_depth = max(param_depth, target_param_depth)
+ return frame_depth#, param_depth
def _patchable_stackadjust(self):
# stack adjustment LEA
@@ -733,10 +741,28 @@
def _call_header(self):
# NB. the shape of the frame is hard-coded in get_basic_shape() too.
# Also, make sure this is consistent with FRAME_FIXED_SIZE.
- self.mc.PUSH_r(ebp.value)
- self.mc.MOV_rr(ebp.value, esp.value)
- for loc in self.cpu.CALLEE_SAVE_REGISTERS:
- self.mc.PUSH_r(loc.value)
+ if IS_X86_32:
+ self.mc.SUB_ri(esp.value, WORD * (OFFSTACK_REAL_FRAME-1))
+ self.mc.PUSH_i32(4096) # XXX XXX!
+ elif IS_X86_64:
+ save_regs = [r9, r8, ecx, edx, esi, edi]
+ assert OFFSTACK_REAL_FRAME >= len(save_regs)
+ self.mc.SUB_ri(esp.value, WORD * (OFFSTACK_REAL_FRAME
+ - len(save_regs)))
+ for reg in save_regs:
+ self.mc.PUSH_r(reg.value)
+ self.mc.MOV_ri(edi.value, 4096) # XXX XXX!
+ self.mc.CALL(imm(self.offstack_malloc_addr))
+ if IS_X86_64:
+ for i in range(len(save_regs)): # XXX looks heavy
+ reg = save_regs[len(save_regs) - 1 - i]
+ self.mc.MOV_rs(reg.value, WORD * i)
+ self.mc.MOV_mr((eax.value, WORD * (FRAME_FIXED_SIZE-1)),
+ ebp.value) # (new ebp) <- ebp
+ self.mc.LEA_rm(ebp.value, (eax.value, WORD * (FRAME_FIXED_SIZE-1)))
+ for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)):
+ loc = self.cpu.CALLEE_SAVE_REGISTERS[i]
+ self.mc.MOV_br(WORD*(-1-i), loc.value) # (ebp-4-4*i) <- reg
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
@@ -761,16 +787,17 @@
self._call_header()
def _call_footer(self):
- self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
-
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
self._call_footer_shadowstack(gcrootmap)
+ self.mc.ADD_ri(esp.value, WORD * OFFSTACK_REAL_FRAME)
for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
- self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
+ loc = self.cpu.CALLEE_SAVE_REGISTERS[i]
+ self.mc.MOV_rb(loc.value, WORD*(-1-i)) # (ebp-4-4*i) -> reg
+ self.mc.MOV_rb(ebp.value, 0) # (ebp) -> ebp
+ # XXX free!
- self.mc.POP_r(ebp.value)
self.mc.RET()
def _call_header_shadowstack(self, gcrootmap):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -23,6 +23,7 @@
TempBox
from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
+from pypy.jit.backend.x86.arch import OFFSTACK_REAL_FRAME
from pypy.rlib.rarithmetic import r_longlong
class X86RegisterManager(RegisterManager):
@@ -129,9 +130,9 @@
class X86FrameManager(FrameManager):
@staticmethod
def frame_pos(i, box_type):
- if IS_X86_32 and box_type == FLOAT:
- return StackLoc(i, get_ebp_ofs(i+1), box_type)
- else:
+ #if IS_X86_32 and box_type == FLOAT:
+ # return StackLoc(i, get_ebp_ofs(i+1), box_type)
+ #else:
return StackLoc(i, get_ebp_ofs(i), box_type)
@staticmethod
def frame_size(box_type):
@@ -168,7 +169,7 @@
def _prepare(self, inputargs, operations, allgcrefs):
self.fm = X86FrameManager()
- self.param_depth = 0
+ #self.param_depth = 0
cpu = self.assembler.cpu
operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
allgcrefs)
@@ -197,7 +198,7 @@
allgcrefs):
operations = self._prepare(inputargs, operations, allgcrefs)
self._update_bindings(arglocs, inputargs)
- self.param_depth = prev_depths[1]
+ #self.param_depth = prev_depths[1]
self.min_bytes_before_label = 0
return operations
@@ -206,11 +207,24 @@
at_least_position)
def reserve_param(self, n):
+ xxx
self.param_depth = max(self.param_depth, n)
def _set_initial_bindings(self, inputargs):
if IS_X86_64:
inputargs = self._set_initial_bindings_regs_64(inputargs)
+
+ cur_frame_ofs = WORD * (OFFSTACK_REAL_FRAME + 1)
+ mc = self.assembler.mc
+ for box in inputargs:
+ assert isinstance(box, Box)
+ if IS_X86_32 and box.type == FLOAT:
+ xxx
+ loc = self.fm.loc(box)
+ mc.MOV_rs(eax.value, cur_frame_ofs)
+ mc.MOV_br(loc.value, eax.value)
+ return
+
# ...
# stack layout: arg2
# arg1
@@ -1518,11 +1532,15 @@
else:
oplist[num] = value
+##def get_ebp_ofs(position):
+## # Argument is a frame position (0, 1, 2...).
+## # Returns (ebp-20), (ebp-24), (ebp-28)...
+## # i.e. the n'th word beyond the fixed frame size.
+## return -WORD * (FRAME_FIXED_SIZE + position)
def get_ebp_ofs(position):
# Argument is a frame position (0, 1, 2...).
- # Returns (ebp-20), (ebp-24), (ebp-28)...
- # i.e. the n'th word beyond the fixed frame size.
- return -WORD * (FRAME_FIXED_SIZE + position)
+ # Returns (ebp+8), (ebp+12), (ebp+16)...
+ return WORD * (2 + position)
def _valid_addressing_size(size):
return size == 1 or size == 2 or size == 4 or size == 8
diff --git a/pypy/jit/backend/x86/support.py b/pypy/jit/backend/x86/support.py
--- a/pypy/jit/backend/x86/support.py
+++ b/pypy/jit/backend/x86/support.py
@@ -33,6 +33,14 @@
memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
rffi.SIZE_T], lltype.Void,
sandboxsafe=True, _nowrapper=True)
+offstack_malloc_fn = rffi.llexternal('malloc', [rffi.SIZE_T],
+ llmemory.Address,
+ sandboxsafe=True, _nowrapper=True)
+offstack_realloc_fn = rffi.llexternal('realloc', [llmemory.Address,
+ rffi.SIZE_T], llmemory.Address,
+ sandboxsafe=True, _nowrapper=True)
+offstack_free_fn = rffi.llexternal('free', [llmemory.Address], lltype.Void,
+ sandboxsafe=True, _nowrapper=True)
# ____________________________________________________________
More information about the pypy-commit
mailing list