[pypy-commit] pypy stmgc-c7-rewindjmp: in-progress
arigo
noreply at buildbot.pypy.org
Mon Aug 18 10:42:19 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7-rewindjmp
Changeset: r72868:8ff5b23d8b84
Date: 2014-08-18 10:41 +0200
http://bitbucket.org/pypy/pypy/changeset/8ff5b23d8b84/
Log: in-progress
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -16,7 +16,7 @@
# +--------------------+ <== aligned to 16 bytes
# | return address |
# +--------------------+ ------------------------.
-# | resume buf (if STM)| STM_FRAME_FIXED_SIZE |
+# | rewind_jmp_buf(STM)| STM_FRAME_FIXED_SIZE |
# +--------------------+ ----------------------. |
# | saved regs | FRAME_FIXED_SIZE | |
# +--------------------+ --------------------. | |
@@ -46,18 +46,9 @@
assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3
-# The STM resume buffer (on x86-64) is four words wide. Actually, clang
-# uses three words (see test_stm.py): rbp, rip, rsp. But the value of
-# rbp is not interesting for the JIT-generated machine code. So the
-# STM_JMPBUF_OFS is the offset from the stack top to the start of the
-# buffer, with only words at offset +1 and +2 in this buffer being
-# meaningful. We use ebp, i.e. the word at offset +0, to store the
-# resume counter.
-
-STM_RESUME_BUF_WORDS = 4
-STM_FRAME_FIXED_SIZE = FRAME_FIXED_SIZE + STM_RESUME_BUF_WORDS
-STM_JMPBUF_OFS = WORD * FRAME_FIXED_SIZE
-STM_JMPBUF_OFS_RBP = STM_JMPBUF_OFS + 0 * WORD
-STM_JMPBUF_OFS_RIP = STM_JMPBUF_OFS + 1 * WORD
-STM_JMPBUF_OFS_RSP = STM_JMPBUF_OFS + 2 * WORD
-STM_OLD_SHADOWSTACK = STM_JMPBUF_OFS + 3 * WORD
+# The STM rewind_jmp_buf (on x86-64) is two words wide:
+STM_REWIND_JMP_BUF_WORDS = 2
+STM_FRAME_FIXED_SIZE = FRAME_FIXED_SIZE + STM_REWIND_JMP_BUF_WORDS
+STM_JMPBUF_OFS = WORD * FRAME_FIXED_SIZE
+STM_SHADOWSTACK_BASE_OFS = STM_JMPBUF_OFS + 0 * WORD
+STM_PREV_OFS = STM_JMPBUF_OFS + 1 * WORD
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -19,8 +19,7 @@
from rpython.jit.backend.x86.arch import (
FRAME_FIXED_SIZE, WORD, IS_X86_64, JITFRAME_FIXED_SIZE, IS_X86_32,
PASS_ON_MY_FRAME, STM_FRAME_FIXED_SIZE, STM_JMPBUF_OFS,
- STM_JMPBUF_OFS_RIP, STM_JMPBUF_OFS_RSP, STM_JMPBUF_OFS_RBP,
- STM_OLD_SHADOWSTACK)
+ STM_SHADOWSTACK_BASE_OFS, STM_PREV_OFS)
from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
@@ -886,45 +885,83 @@
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
return self.heap_tl(gcrootmap.get_root_stack_top_addr())
+ def heap_rjthread(self):
+ """STM: Return an AddressLoc for '&stm_thread_local.rjthread'."""
+ return self.heap_tl(rstm.adr_rjthread)
+
+ def heap_rjthread_head(self):
+ """STM: Return an AddressLoc for '&stm_thread_local.rjthread.head'."""
+ return self.heap_tl(rstm.adr_rjthread_head)
+
+ def heap_rjthread_moved_off_base(self):
+ """STM: AddressLoc for '&stm_thread_local.rjthread.moved_off_base'."""
+ return self.heap_tl(rstm.adr_rjthread_moved_off_base)
+
def _call_header_shadowstack(self):
# put the frame in ebp on the shadowstack for the GC to find
# (ebp is a writeable object and does not need a write-barrier
# again (ensured by the code calling the loop))
- self.mc.MOV(ebx, self.heap_shadowstack_top())
+ mc = self.mc
+ mc.MOV(ebx, self.heap_shadowstack_top())
+ mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value)
+ # MOV [ebx], ebp
if self.cpu.gc_ll_descr.stm:
- self.mc.MOV_mi((self.SEGMENT_NO, ebx.value, 0),
- rstm.stm_stack_marker_new) # MOV [ebx], MARKER_NEW
- self.mc.MOV_mr((self.SEGMENT_NO, ebx.value, WORD),
- ebp.value) # MOV [ebx+WORD], ebp
- self.mc.MOV_sr(STM_OLD_SHADOWSTACK, ebx.value)
- # MOV [esp+xx], ebx
- self.mc.ADD_ri(ebx.value, 2 * WORD)
+ # inlining stm_rewind_jmp_enterframe()
+ r11v = X86_64_SCRATCH_REG.value
+ rjh = self.heap_rjthread_head()
+ mc.ADD_ri8(ebx.value, 1) # ADD ebx, 1
+ mc.MOV_rm(r11v, rjh) # MOV r11, [rjthread.head]
+ mc.MOV_sr(STM_SHADOWSTACK_BASE_OFS, ebx.value)
+ # MOV [esp+ssbase], ebx
+ mc.ADD_ri8(ebx.value, WORD-1) # ADD ebx, 7
+ mc.MOV_sr(STM_PREV_OFS, r11v) # MOV [esp+prev], r11
+ mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
+ mc.LEA_rs(r11v, STM_JMPBUF_OFS) # LEA r11, [esp+bufofs]
+ mc.MOV_mr(rjh, r11v) # MOV [rjthread.head], r11
+ #
else:
- self.mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0),
- ebp.value) # MOV [ebx], ebp
- self.mc.ADD_ri(ebx.value, WORD)
- self.mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
+ mc.ADD_ri(ebx.value, WORD) # ADD ebx, WORD
+ mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
def _call_footer_shadowstack(self):
+ mc = self.mc
if self.cpu.gc_ll_descr.stm:
# STM: in the rare case where we need realloc_frame, the new
# frame is pushed on top of the old one. It's even possible
# that this occurs more than once. So we have to restore
# the old shadowstack by looking up its original saved value.
- self.mc.MOV_rs(ecx.value, STM_OLD_SHADOWSTACK)
- self.mc.MOV(self.heap_shadowstack_top(), ecx)
+ # The rest of this is inlining stm_rewind_jmp_leaveframe().
+ r11v = X86_64_SCRATCH_REG.value
+ rjh = self.heap_rjthread_head()
+ rjmovd_o_b = self.heap_rjthread_moved_off_base()
+ adr_rjthread_moved_off_base
+ mc.MOV_rs(r11v, STM_SHADOWSTACK_BASE_OFS) # MOV r11, [esp+ssbase]
+ mc.MOV_rs(ebx.value, STM_PREV_OFS) # MOV ebx, [esp+prev]
+ mc.MOV(self.heap_shadowstack_top(), r11v) # MOV [rootstacktop], r11
+ mc.LEA_rs(r11v, STM_JMPBUF_OFS) # LEA r11, [esp+bufofs]
+ mc.MOV_mr(rjh, ebx.value) # MOV [rjthread.head], ebx
+ mc.CMP_rm(r11v, rjmovd_o_b) # CMP r11, [rjth.movd_o_b]
+ mc.J_il8(rx86.Conditions['NE'], 0) # JNE label_below
+ jne_location = mc.get_relative_pos()
+ #
+ mc.CALL(imm(rstm.adr_pypy__rewind_jmp_copy_stack_slice))
+ #
+ # patch the JNE above
+ offset = mc.get_relative_pos() - jne_location
+ assert 0 < offset <= 127
+ mc.overwrite(jne_location-1, chr(offset))
else:
# SUB [rootstacktop], WORD
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
rst = gcrootmap.get_root_stack_top_addr()
if rx86.fits_in_32bits(rst):
# SUB [rootstacktop], WORD
- self.mc.SUB_ji8((self.SEGMENT_NO, rst), WORD)
+ mc.SUB_ji8((self.SEGMENT_NO, rst), WORD)
else:
# MOV ebx, rootstacktop
# SUB [ebx], WORD
- self.mc.MOV_ri(ebx.value, rst)
- self.mc.SUB_mi8((self.SEGMENT_NO, ebx.value, 0), WORD)
+ mc.MOV_ri(ebx.value, rst)
+ mc.SUB_mi8((self.SEGMENT_NO, ebx.value, 0), WORD)
def redirect_call_assembler(self, oldlooptoken, newlooptoken):
# some minimal sanity checking
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1292,9 +1292,6 @@
need_lower_byte=True)
self.perform(op, [], resloc)
- def XXXconsider_stm_transaction_break(self, op, guard_op):
- self.perform_with_guard(op, guard_op, [], None)
-
def consider_jump(self, op):
assembler = self.assembler
assert self.jump_target_descr is None
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -13,12 +13,16 @@
TID = rffi.UINT
tid_offset = CFlexSymbolic('offsetof(struct rpyobj_s, tid)')
stm_nb_segments = CFlexSymbolic('STM_NB_SEGMENTS')
-stm_stack_marker_new = CFlexSymbolic('STM_STACK_MARKER_NEW')
-stm_stack_marker_old = CFlexSymbolic('STM_STACK_MARKER_OLD')
adr_nursery_free = CFlexSymbolic('((long)&STM_SEGMENT->nursery_current)')
adr_nursery_top = CFlexSymbolic('((long)&STM_SEGMENT->nursery_end)')
adr_pypy_stm_nursery_low_fill_mark = (
CFlexSymbolic('((long)&pypy_stm_nursery_low_fill_mark)'))
+adr_rjthread = (
+ CFlexSymbolic('((long)&stm_thread_local.rjthread'))
+adr_rjthread_head = (
+ CFlexSymbolic('((long)&stm_thread_local.rjthread.head'))
+adr_rjthread_moved_off_base = (
+ CFlexSymbolic('((long)&stm_thread_local.rjthread.moved_off_base'))
adr_transaction_read_version = (
CFlexSymbolic('((long)&STM_SEGMENT->transaction_read_version)'))
adr_jmpbuf_ptr = (
@@ -39,6 +43,8 @@
CFlexSymbolic('((long)&stm_commit_transaction)'))
adr_pypy_stm_start_transaction = (
CFlexSymbolic('((long)&pypy_stm_start_transaction)'))
+adr_pypy__rewind_jmp_copy_stack_slice = (
+ CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)'))
def rewind_jmp_frame():
diff --git a/rpython/translator/stm/src_stm/stmgcintf.h b/rpython/translator/stm/src_stm/stmgcintf.h
--- a/rpython/translator/stm/src_stm/stmgcintf.h
+++ b/rpython/translator/stm/src_stm/stmgcintf.h
@@ -116,5 +116,10 @@
/* NB. this logic is hard-coded in jit/backend/x86/assembler.py too */
}
+static void pypy__rewind_jmp_copy_stack_slice(void)
+{
+ _rewind_jmp_copy_stack_slice(&stm_thread_local.rjthread);
+}
+
#endif /* _RPY_STMGCINTF_H */
More information about the pypy-commit
mailing list