[pypy-commit] pypy stmgc-c7: in-progress

arigo noreply at buildbot.pypy.org
Mon Apr 21 20:48:43 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c7
Changeset: r70826:389b28ee75ee
Date: 2014-04-21 19:45 +0200
http://bitbucket.org/pypy/pypy/changeset/389b28ee75ee/

Log:	in-progress

diff --git a/rpython/jit/backend/llsupport/gcmap.py b/rpython/jit/backend/llsupport/gcmap.py
--- a/rpython/jit/backend/llsupport/gcmap.py
+++ b/rpython/jit/backend/llsupport/gcmap.py
@@ -4,15 +4,29 @@
 from rpython.rlib.rarithmetic import r_uint
 from rpython.jit.backend.llsupport.symbolic import WORD
 
-def allocate_gcmap(assembler, frame_depth, fixed_size):
+GCMAP_STM_LOCATION = 2     # xxx add this only if stm
+
+def allocate_gcmap(assembler, frame_depth, fixed_size, stm_location=None):
     size = frame_depth + fixed_size
-    malloc_size = (size // WORD // 8 + 1) + 1
+    malloc_size = (size // WORD // 8 + 1) + GCMAP_STM_LOCATION + 1
     rawgcmap = assembler.datablockwrapper.malloc_aligned(WORD * malloc_size,
                                                     WORD)
     # set the length field
     rffi.cast(rffi.CArrayPtr(lltype.Signed), rawgcmap)[0] = malloc_size - 1
     gcmap = rffi.cast(lltype.Ptr(jitframe.GCMAP), rawgcmap)
     # zero the area
-    for i in range(malloc_size - 1):
+    for i in range(malloc_size - 3):
         gcmap[i] = r_uint(0)
+    # write the stm_location in the last two words
+    raw_stm_location = extract_raw_stm_location(stm_location)
+    gcmap[malloc_size - 3], gcmap[malloc_size - 2] = raw_stm_location
     return gcmap
+
+def extract_raw_stm_location(stm_location):
+    if stm_location is not None:
+        num = rffi.cast(lltype.Unsigned, stm_location.num)
+        ref = rffi.cast(lltype.Unsigned, stm_location.ref)
+    else:
+        num = r_uint(0)
+        ref = r_uint(0)
+    return (num, ref)
diff --git a/rpython/jit/backend/llsupport/jitframe.py b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -160,6 +160,10 @@
         MAX = 64
     gcmap = (obj_addr + getofs('jf_gcmap')).address[0]
     gcmap_lgt = (gcmap + GCMAPLENGTHOFS).signed[0]
+    #
+    from rpython.jit.backend.llsupport.gcmap import GCMAP_STM_LOCATION
+    gcmap_lgt -= GCMAP_STM_LOCATION
+    #
     while no < gcmap_lgt:
         cur = (gcmap + GCMAPBASEOFS + UNSIGN_SIZE * no).unsigned[0]
         while not (cur & (1 << state)):
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -5,7 +5,7 @@
 from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
                                                 DEBUG_COUNTER, debug_bridge)
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
-from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
+from rpython.jit.backend.llsupport.gcmap import extract_raw_stm_location
 from rpython.jit.metainterp.history import Const, Box, VOID
 from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
 from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
@@ -396,6 +396,29 @@
         #
         if not for_frame:
             self._push_all_regs_to_frame(mc, [], withfloats, callee_only=True)
+            #
+            if self.cpu.gc_ll_descr.stm:
+                # We are in the slow-path of write barriers, which is
+                # supposed to be called rarely.  We have to save the
+                # current 'stm_location' so that it is found.  The easiest
+                # is to simply push it on the shadowstack, from its source
+                # location as two extra arguments on the machine stack.
+                # 'r14' is kept around as the original value of
+                # shadowstack_top, ready to be stored back below.
+                # XXX this should also be done if 'for_frame' is true...
+                assert IS_X86_64
+                mc.MOV(r14, self.heap_shadowstack_top())
+                mc.MOV_rs(edi.value, 3 * WORD)
+                # do here the 'num = (num<<1) + 1' rather than at the caller
+                # site, to increase the chances that it can use PUSH_i8
+                mc.LEA_ra(edi.value, (self.SEGMENT_NO, rx86.NO_BASE_REGISTER,
+                                      edi.value, 1, +1))
+                mc.MOV_mr((self.SEGMENT_NO, r14.value, 0), edi.value)
+                mc.MOV_rs(edi.value, 2 * WORD)
+                mc.MOV_mr((self.SEGMENT_NO, r14.value, WORD), edi.value)
+                mc.LEA_rm(edi.value, (self.SEGMENT_NO, r14.value, 2 * WORD))
+                mc.MOV(self.heap_shadowstack_top(), edi)
+            #
             if IS_X86_32:
                 # we have 2 extra words on stack for retval and we pass 1 extra
                 # arg, so we need to substract 2 words
@@ -440,11 +463,16 @@
         #
 
         if not for_frame:
+            if self.cpu.gc_ll_descr.stm:
+                mc.MOV(self.heap_shadowstack_top(), r14)
             if IS_X86_32:
                 # ADD touches CPU flags
                 mc.LEA_rs(esp.value, 2 * WORD)
             self._pop_all_regs_from_frame(mc, [], withfloats, callee_only=True)
-            mc.RET16_i(WORD)
+            if self.cpu.gc_ll_descr.stm:
+                mc.RET16_i(3 * WORD)
+            else:
+                mc.RET16_i(WORD)
         else:
             if IS_X86_32:
                 mc.MOV_rs(edx.value, 4 * WORD)
@@ -1863,7 +1891,7 @@
                guard_opnum == rop.GUARD_NOT_FORCED)
         is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
         is_guard_not_forced = guard_opnum == rop.GUARD_NOT_FORCED
-        gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+        gcmap = self._regalloc.get_empty_gcmap(frame_depth)
         return GuardToken(self.cpu, gcmap, faildescr, failargs,
                           fail_locs, exc, frame_depth,
                           is_guard_not_invalidated, is_guard_not_forced)
@@ -2229,6 +2257,16 @@
             assert self.wb_slowpath[helper_num] != 0
         #
         if not is_frame:
+            if self.cpu.gc_ll_descr.stm:
+                # get the num and ref components of the stm_location, and
+                # push them to the stack.  It's 16 bytes, so alignment is
+                # still ok.  The one or three words pushed here are removed
+                # by the callee.
+                assert IS_X86_64
+                num, ref = extract_raw_stm_location(
+                    self._regalloc.stm_location)
+                mc.PUSH_i(num)
+                mc.PUSH_i(ref)
             mc.PUSH(loc_base)
         if is_frame and align_stack:
             mc.SUB_ri(esp.value, 16 - WORD) # erase the return address
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -7,6 +7,7 @@
 from rpython.jit.backend.llsupport.descr import (ArrayDescr, CallDescr,
     unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
+from rpython.jit.backend.llsupport.jiframe import GCMAP
 from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
      RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op)
 from rpython.jit.backend.x86 import rx86
@@ -22,7 +23,7 @@
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
     ConstFloat, BoxInt, BoxFloat, INT, REF, FLOAT, TargetToken)
-from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.metainterp.resoperation import rop, ResOperation, StmLocation
 from rpython.rlib import rgc
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.rarithmetic import r_longlong, r_uint
@@ -150,6 +151,9 @@
                                   assembler = self.assembler)
         self.xrm = xmm_reg_mgr_cls(self.longevity, frame_manager = self.fm,
                                    assembler = self.assembler)
+        # 'self.stm_location' is the StmLocation of the current operation
+        # (or the last one that actually had a StmLocation)
+        self.stm_location = None
         return operations
 
     def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
@@ -318,6 +322,12 @@
             self.assembler.mc.mark_op(op)
             self.rm.position = i
             self.xrm.position = i
+            #
+            if op.stm_location is not None:
+                if (self.stm_location.num != op.stm_location.num or
+                    self.stm_location.ref != op.stm_location.ref):
+                    self.stm_location = op.stm_location
+            #
             if op.has_no_side_effect() and op.result not in self.longevity:
                 i += 1
                 self.possibly_free_vars_for_op(op)
@@ -899,9 +909,13 @@
             gc_ll_descr.get_nursery_top_addr(),
             lengthloc, itemsize, maxlength, gcmap, arraydescr)
 
+    def get_empty_gcmap(self, frame_depth):
+        return allocate_gcmap(self.assembler, frame_depth,
+                              JITFRAME_FIXED_SIZE, self.stm_location)
+
     def get_gcmap(self, forbidden_regs=[], noregs=False):
         frame_depth = self.fm.get_frame_depth()
-        gcmap = allocate_gcmap(self.assembler, frame_depth, JITFRAME_FIXED_SIZE)
+        gcmap = self.get_empty_gcmap(frame_depth)
         for box, loc in self.rm.reg_bindings.iteritems():
             if loc in forbidden_regs:
                 continue
diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py
--- a/rpython/jit/backend/x86/stmtlocal.py
+++ b/rpython/jit/backend/x86/stmtlocal.py
@@ -9,6 +9,7 @@
     _instruction = "movq %%fs:0, %0"
 
 eci = ExternalCompilationInfo(post_include_bits=['''
+#define RPY_STM_JIT  1
 static long pypy__threadlocal_base(void)
 {
     /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */
diff --git a/rpython/translator/stm/src_stm/extracode.h b/rpython/translator/stm/src_stm/extracode.h
--- a/rpython/translator/stm/src_stm/extracode.h
+++ b/rpython/translator/stm/src_stm/extracode.h
@@ -56,7 +56,7 @@
     return *(long *)(src + ofs);
 }
 
-static RPyStringSpace0 *_fetch_rpyspace0(char *seg_base, object_t *base,
+static RPyStringSpace0 *_fetch_rpsspace0(char *seg_base, object_t *base,
                                          long ofs)
 {
     char *src = seg_base + (uintptr_t)base;
@@ -74,14 +74,24 @@
     RPyStringSpace0 *co_name;
     RPyStringSpace0 *co_lnotab;
     char *ntrunc = "", *fntrunc = "";
-    long fnlen, nlen, line;
-    char *fn, *name;
+    long fnlen = 1, nlen = 1, line = 0;
+    char *fn = "?", *name = "?";
+
+#ifdef RPY_STM_JIT
+    if (odd_number == STM_STACK_MARKER_NEW ||
+        odd_number == STM_STACK_MARKER_OLD) {
+        assert(o);
+        /* XXX ji_jf_forward */
+        /* XXX */
+        o = NULL;
+    }
+#endif
 
     if (o) {
-        co_filename   =_fetch_rpyspace0(segment_base, o, g_co_filename_ofs);
-        co_name       =_fetch_rpyspace0(segment_base, o, g_co_name_ofs);
+        co_filename   =_fetch_rpsspace0(segment_base, o, g_co_filename_ofs);
+        co_name       =_fetch_rpsspace0(segment_base, o, g_co_name_ofs);
         co_firstlineno=_fetch_lngspace0(segment_base, o, g_co_firstlineno_ofs);
-        co_lnotab     =_fetch_rpyspace0(segment_base, o, g_co_lnotab_ofs);
+        co_lnotab     =_fetch_rpsspace0(segment_base, o, g_co_lnotab_ofs);
 
         long remaining = outputbufsize - 32;
         nlen = RPyString_Size(co_name);
@@ -112,13 +122,6 @@
             line += ((unsigned char *)lnotab)[i + 1];
         }
     }
-    else {
-        fnlen = 1;
-        fn = "?";
-        nlen = 1;
-        name = "?";
-        line = 0;
-    }
 
     snprintf(outputbuf, outputbufsize, "File \"%s%.*s\", line %ld, in %.*s%s",
              fntrunc, (int)fnlen, fn, line, (int)nlen, name, ntrunc);


More information about the pypy-commit mailing list