[pypy-commit] pypy stmgc-c7-rewindjmp: in-progress

arigo noreply at buildbot.pypy.org
Mon Aug 18 10:42:19 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7-rewindjmp
Changeset: r72868:8ff5b23d8b84
Date: 2014-08-18 10:41 +0200
http://bitbucket.org/pypy/pypy/changeset/8ff5b23d8b84/

Log:	in-progress

diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -16,7 +16,7 @@
 #        +--------------------+    <== aligned to 16 bytes
 #        |   return address   |
 #        +--------------------+               ------------------------.
-#        | resume buf (if STM)|                  STM_FRAME_FIXED_SIZE |
+#        | rewind_jmp_buf(STM)|                  STM_FRAME_FIXED_SIZE |
 #        +--------------------+           ----------------------.     |
 #        |    saved regs      |                FRAME_FIXED_SIZE |     |
 #        +--------------------+       --------------------.     |     |
@@ -46,18 +46,9 @@
 assert PASS_ON_MY_FRAME >= 12       # asmgcc needs at least JIT_USE_WORDS + 3
 
 
-# The STM resume buffer (on x86-64) is four words wide.  Actually, clang
-# uses three words (see test_stm.py): rbp, rip, rsp.  But the value of
-# rbp is not interesting for the JIT-generated machine code.  So the
-# STM_JMPBUF_OFS is the offset from the stack top to the start of the
-# buffer, with only words at offset +1 and +2 in this buffer being
-# meaningful.  We use ebp, i.e. the word at offset +0, to store the
-# resume counter.
-
-STM_RESUME_BUF_WORDS  = 4
-STM_FRAME_FIXED_SIZE  = FRAME_FIXED_SIZE + STM_RESUME_BUF_WORDS
-STM_JMPBUF_OFS        = WORD * FRAME_FIXED_SIZE
-STM_JMPBUF_OFS_RBP    = STM_JMPBUF_OFS + 0 * WORD
-STM_JMPBUF_OFS_RIP    = STM_JMPBUF_OFS + 1 * WORD
-STM_JMPBUF_OFS_RSP    = STM_JMPBUF_OFS + 2 * WORD
-STM_OLD_SHADOWSTACK   = STM_JMPBUF_OFS + 3 * WORD
+# The STM rewind_jmp_buf (on x86-64) is two words wide:
+STM_REWIND_JMP_BUF_WORDS  = 2
+STM_FRAME_FIXED_SIZE      = FRAME_FIXED_SIZE + STM_REWIND_JMP_BUF_WORDS
+STM_JMPBUF_OFS            = WORD * FRAME_FIXED_SIZE
+STM_SHADOWSTACK_BASE_OFS  = STM_JMPBUF_OFS + 0 * WORD
+STM_PREV_OFS              = STM_JMPBUF_OFS + 1 * WORD
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -19,8 +19,7 @@
 from rpython.jit.backend.x86.arch import (
     FRAME_FIXED_SIZE, WORD, IS_X86_64, JITFRAME_FIXED_SIZE, IS_X86_32,
     PASS_ON_MY_FRAME, STM_FRAME_FIXED_SIZE, STM_JMPBUF_OFS,
-    STM_JMPBUF_OFS_RIP, STM_JMPBUF_OFS_RSP, STM_JMPBUF_OFS_RBP,
-    STM_OLD_SHADOWSTACK)
+    STM_SHADOWSTACK_BASE_OFS, STM_PREV_OFS)
 from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
     xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
     r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
@@ -886,45 +885,83 @@
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         return self.heap_tl(gcrootmap.get_root_stack_top_addr())
 
+    def heap_rjthread(self):
+        """STM: Return an AddressLoc for '&stm_thread_local.rjthread'."""
+        return self.heap_tl(rstm.adr_rjthread)
+
+    def heap_rjthread_head(self):
+        """STM: Return an AddressLoc for '&stm_thread_local.rjthread.head'."""
+        return self.heap_tl(rstm.adr_rjthread_head)
+
+    def heap_rjthread_moved_off_base(self):
+        """STM: AddressLoc for '&stm_thread_local.rjthread.moved_off_base'."""
+        return self.heap_tl(rstm.adr_rjthread_moved_off_base)
+
     def _call_header_shadowstack(self):
         # put the frame in ebp on the shadowstack for the GC to find
         # (ebp is a writeable object and does not need a write-barrier
         # again (ensured by the code calling the loop))
-        self.mc.MOV(ebx, self.heap_shadowstack_top())
+        mc = self.mc
+        mc.MOV(ebx, self.heap_shadowstack_top())
+        mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value)
+                                                      # MOV [ebx], ebp
         if self.cpu.gc_ll_descr.stm:
-            self.mc.MOV_mi((self.SEGMENT_NO, ebx.value, 0),
-                           rstm.stm_stack_marker_new) # MOV [ebx], MARKER_NEW
-            self.mc.MOV_mr((self.SEGMENT_NO, ebx.value, WORD),
-                           ebp.value)                 # MOV [ebx+WORD], ebp
-            self.mc.MOV_sr(STM_OLD_SHADOWSTACK, ebx.value)
-                                                      # MOV [esp+xx], ebx
-            self.mc.ADD_ri(ebx.value, 2 * WORD)
+            # inlining stm_rewind_jmp_enterframe()
+            r11v = X86_64_SCRATCH_REG.value
+            rjh = self.heap_rjthread_head()
+            mc.ADD_ri8(ebx.value, 1)                 # ADD ebx, 1
+            mc.MOV_rm(r11v, rjh)                     # MOV r11, [rjthread.head]
+            mc.MOV_sr(STM_SHADOWSTACK_BASE_OFS, ebx.value)
+                                                     # MOV [esp+ssbase], ebx
+            mc.ADD_ri8(ebx.value, WORD-1)            # ADD ebx, 7
+            mc.MOV_sr(STM_PREV_OFS, r11v)            # MOV [esp+prev], r11
+            mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
+            mc.LEA_rs(r11v, STM_JMPBUF_OFS)          # LEA r11, [esp+bufofs]
+            mc.MOV_mr(rjh, r11v)                     # MOV [rjthread.head], r11
+        #
         else:
-            self.mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0),
-                           ebp.value)                 # MOV [ebx], ebp
-            self.mc.ADD_ri(ebx.value, WORD)
-        self.mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
+            mc.ADD_ri(ebx.value, WORD)               # ADD ebx, WORD
+            mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
 
     def _call_footer_shadowstack(self):
+        mc = self.mc
         if self.cpu.gc_ll_descr.stm:
             # STM: in the rare case where we need realloc_frame, the new
             # frame is pushed on top of the old one.  It's even possible
             # that this occurs more than once.  So we have to restore
             # the old shadowstack by looking up its original saved value.
-            self.mc.MOV_rs(ecx.value, STM_OLD_SHADOWSTACK)
-            self.mc.MOV(self.heap_shadowstack_top(), ecx)
+            # The rest of this is inlining stm_rewind_jmp_leaveframe().
+            r11v = X86_64_SCRATCH_REG.value
+            rjh = self.heap_rjthread_head()
+            rjmovd_o_b = self.heap_rjthread_moved_off_base()
+            adr_rjthread_moved_off_base
+            mc.MOV_rs(r11v, STM_SHADOWSTACK_BASE_OFS) # MOV r11, [esp+ssbase]
+            mc.MOV_rs(ebx.value, STM_PREV_OFS)        # MOV ebx, [esp+prev]
+            mc.MOV(self.heap_shadowstack_top(), r11v) # MOV [rootstacktop], r11
+            mc.LEA_rs(r11v, STM_JMPBUF_OFS)           # LEA r11, [esp+bufofs]
+            mc.MOV_mr(rjh, ebx.value)                 # MOV [rjthread.head], ebx
+            mc.CMP_rm(r11v, rjmovd_o_b)               # CMP r11, [rjth.movd_o_b]
+            mc.J_il8(rx86.Conditions['NE'], 0)        # JNE label_below
+            jne_location = mc.get_relative_pos()
+            #
+            mc.CALL(imm(rstm.adr_pypy__rewind_jmp_copy_stack_slice))
+            #
+            # patch the JNE above
+            offset = mc.get_relative_pos() - jne_location
+            assert 0 < offset <= 127
+            mc.overwrite(jne_location-1, chr(offset))
         else:
             # SUB [rootstacktop], WORD
             gcrootmap = self.cpu.gc_ll_descr.gcrootmap
             rst = gcrootmap.get_root_stack_top_addr()
             if rx86.fits_in_32bits(rst):
                 # SUB [rootstacktop], WORD
-                self.mc.SUB_ji8((self.SEGMENT_NO, rst), WORD)
+                mc.SUB_ji8((self.SEGMENT_NO, rst), WORD)
             else:
                 # MOV ebx, rootstacktop
                 # SUB [ebx], WORD
-                self.mc.MOV_ri(ebx.value, rst)
-                self.mc.SUB_mi8((self.SEGMENT_NO, ebx.value, 0), WORD)
+                mc.MOV_ri(ebx.value, rst)
+                mc.SUB_mi8((self.SEGMENT_NO, ebx.value, 0), WORD)
 
     def redirect_call_assembler(self, oldlooptoken, newlooptoken):
         # some minimal sanity checking
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1292,9 +1292,6 @@
                                                 need_lower_byte=True)
             self.perform(op, [], resloc)
 
-    def XXXconsider_stm_transaction_break(self, op, guard_op):
-        self.perform_with_guard(op, guard_op, [], None)
-
     def consider_jump(self, op):
         assembler = self.assembler
         assert self.jump_target_descr is None
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -13,12 +13,16 @@
 TID = rffi.UINT
 tid_offset = CFlexSymbolic('offsetof(struct rpyobj_s, tid)')
 stm_nb_segments = CFlexSymbolic('STM_NB_SEGMENTS')
-stm_stack_marker_new = CFlexSymbolic('STM_STACK_MARKER_NEW')
-stm_stack_marker_old = CFlexSymbolic('STM_STACK_MARKER_OLD')
 adr_nursery_free = CFlexSymbolic('((long)&STM_SEGMENT->nursery_current)')
 adr_nursery_top  = CFlexSymbolic('((long)&STM_SEGMENT->nursery_end)')
 adr_pypy_stm_nursery_low_fill_mark = (
     CFlexSymbolic('((long)&pypy_stm_nursery_low_fill_mark)'))
+adr_rjthread = (
+    CFlexSymbolic('((long)&stm_thread_local.rjthread'))
+adr_rjthread_head = (
+    CFlexSymbolic('((long)&stm_thread_local.rjthread.head'))
+adr_rjthread_moved_off_base = (
+    CFlexSymbolic('((long)&stm_thread_local.rjthread.moved_off_base'))
 adr_transaction_read_version = (
     CFlexSymbolic('((long)&STM_SEGMENT->transaction_read_version)'))
 adr_jmpbuf_ptr = (
@@ -39,6 +43,8 @@
     CFlexSymbolic('((long)&stm_commit_transaction)'))
 adr_pypy_stm_start_transaction = (
     CFlexSymbolic('((long)&pypy_stm_start_transaction)'))
+adr_pypy__rewind_jmp_copy_stack_slice = (
+    CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)'))
 
 
 def rewind_jmp_frame():
diff --git a/rpython/translator/stm/src_stm/stmgcintf.h b/rpython/translator/stm/src_stm/stmgcintf.h
--- a/rpython/translator/stm/src_stm/stmgcintf.h
+++ b/rpython/translator/stm/src_stm/stmgcintf.h
@@ -116,5 +116,10 @@
     /* NB. this logic is hard-coded in jit/backend/x86/assembler.py too */
 }
 
+static void pypy__rewind_jmp_copy_stack_slice(void)
+{
+    _rewind_jmp_copy_stack_slice(&stm_thread_local.rjthread);
+}
+
 
 #endif  /* _RPY_STMGCINTF_H */


More information about the pypy-commit mailing list