[pypy-svn] r79060 - in pypy/branch/arm-backend/pypy/jit/backend/arm: . helper

david at codespeak.net david at codespeak.net
Sat Nov 13 17:21:21 CET 2010


Author: david
Date: Sat Nov 13 17:21:19 2010
New Revision: 79060

Modified:
   pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py
   pypy/branch/arm-backend/pypy/jit/backend/arm/helper/assembler.py
   pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py
   pypy/branch/arm-backend/pypy/jit/backend/arm/runner.py
Log:
Implement forcing with force_token, call_may_force and guard_no_force operations.
Add some logic to possibly merge operations with following guards
Additionaly implement some fixes for procedure calling related to the
manipulation of frame and stack pointers


Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/assembler.py	Sat Nov 13 17:21:19 2010
@@ -35,23 +35,25 @@
     def setup_failure_recovery(self):
 
         @rgc.no_collect
-        def failure_recovery_func(mem_loc, stackloc):
+        def failure_recovery_func(mem_loc, frame_loc):
             """mem_loc is a structure in memory describing where the values for
-            the failargs are stored. stacklock is the address of the stack
-            section where the registers were saved."""
-            enc = rffi.cast(rffi.CCHARP, mem_loc)
-            stack = rffi.cast(rffi.CCHARP, stackloc)
-            return self.decode_registers_and_descr(enc, stack)
+            the failargs are stored.
+            frame loc is the address of the frame pointer for the frame to be
+            decoded frame """
+            return self.decode_registers_and_descr(mem_loc, frame_loc)
 
         self.failure_recovery_func = failure_recovery_func
 
     @rgc.no_collect
-    def decode_registers_and_descr(self, enc, stack):
-        """Decode locations encoded in memory at enc and write the values to
+    def decode_registers_and_descr(self, mem_loc, frame_loc):
+        """Decode locations encoded in memory at mem_loc and write the values to
         the failboxes.
-        Registers are saved on the stack
-        XXX Rest to follow"""
+        Values for spilled vars and registers are stored on stack at frame_loc
+        """
+        enc = rffi.cast(rffi.CCHARP, mem_loc)
         frame_depth = self.decode32(enc, 0)
+        stack = rffi.cast(rffi.CCHARP, frame_loc - (frame_depth)*WORD)
+        regs = rffi.cast(rffi.CCHARP, frame_loc - (frame_depth + len(r.all_regs))*WORD)
         i = 3
         fail_index = -1
         while(True):
@@ -73,13 +75,11 @@
                 i += 4
             elif res == '\xFC': # stack location
                 stack_loc = self.decode32(enc, i+1)
-                #XXX ffuu use propper calculation here
-                value = self.decode32(stack,
-                                    (len(r.all_regs)+frame_depth-stack_loc)*WORD)
+                value = self.decode32(stack, (frame_depth - stack_loc)*WORD)
                 i += 4
             else: # an int for now
                 reg = ord(enc[i])
-                value = self.decode32(stack, reg*WORD)
+                value = self.decode32(regs, reg*WORD)
 
             if group == '\xEF': # INT
                 self.fail_boxes_int.setitem(fail_index, value)
@@ -113,15 +113,17 @@
         self.setup_failure_recovery()
         functype = lltype.Ptr(lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
         decode_registers_addr = llhelper(functype, self.failure_recovery_func)
+
         self.mc.PUSH([reg.value for reg in r.all_regs])     # registers r0 .. r10
         self.mc.MOV_rr(r.r0.value, r.lr.value)  # move mem block address, to r0 to pass as
                                     # parameter to next procedure call
-        self.mc.MOV_rr(r.r1.value, r.sp.value)  # pass the current stack pointer as second param
+        self.mc.MOV_rr(r.r1.value, r.fp.value)  # pass the current frame pointer as second param
 
         self.mc.BL(rffi.cast(lltype.Signed, decode_registers_addr))
         self.mc.MOV_rr(r.ip.value, r.r0.value)
         self.mc.POP([reg.value for reg in r.all_regs])
         self.mc.MOV_rr(r.r0.value, r.ip.value)
+        self.mc.ensure_can_fit(self.epilog_size)
         self.gen_func_epilog()
 
     def _gen_path_to_exit_path(self, op, args, regalloc, fcond=c.AL):
@@ -135,12 +137,15 @@
         \xFE = Empty arg
         """
 
+        descr = op.getdescr()
         box = TempBox()
         reg = regalloc.force_allocate_reg(box)
         # XXX free this memory
         # XXX allocate correct amount of memory
         mem = lltype.malloc(rffi.CArray(lltype.Char), (len(args)+5)*4, flavor='raw')
-        self.encode32(mem, 0, regalloc.frame_manager.frame_depth)
+        # Note, the actual frame depth is one less than the value stored in
+        # regalloc.frame_manager.frame_depth
+        self.encode32(mem, 0, regalloc.frame_manager.frame_depth - 1)
         i = 0
         j = 4
         while(i < len(args)):
@@ -175,8 +180,7 @@
         mem[j] = chr(0xFF)
         memaddr = rffi.cast(lltype.Signed, mem)
 
-
-        n = self.cpu.get_fail_descr_number(op.getdescr())
+        n = self.cpu.get_fail_descr_number(descr)
         self.encode32(mem, j+1, n)
         self.mc.gen_load_int(r.lr.value, memaddr, cond=fcond) # use lr to pass an argument
         self.mc.B(self._exit_code_addr, fcond, reg)
@@ -184,7 +188,7 @@
         # This register is used for patching when assembling a bridge
         # guards going to be patched are allways conditional
         if fcond != c.AL:
-            op.getdescr()._arm_guard_reg = reg
+            descr._arm_guard_reg = reg
         regalloc.possibly_free_var(box)
         return memaddr
 
@@ -192,13 +196,16 @@
         while(self.mc.curraddr() % FUNC_ALIGN != 0):
             self.mc.writechar(chr(0))
 
-    epilog_size = 2*WORD
+    epilog_size = 3*WORD
     def gen_func_epilog(self,cond=c.AL):
         self.mc.MOV_rr(r.sp.value, r.fp.value)
+        self.mc.POP([r.r4.value], cond=cond) # Pop value used as forcething
         self.mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond)
 
     def gen_func_prolog(self):
         self.mc.PUSH([reg.value for reg in r.callee_saved_registers])
+        self.mc.MOV_ri(r.r4.value, 0xCC)
+        self.mc.PUSH([r.r4.value]) # Push some reg to use as force thing which is restored when popping from stack
         self.mc.MOV_rr(r.fp.value, r.sp.value)
 
     def gen_bootstrap_code(self, inputargs, regalloc, looptoken):
@@ -232,12 +239,8 @@
         loop_head=self.mc.curraddr()
         looptoken._arm_bootstrap_code = loop_start
         looptoken._arm_loop_code = loop_head
-        fcond=c.AL
         print inputargs, operations
-        for op in operations:
-            # XXX consider merging ops with next one if it is an adecuate guard
-            opnum = op.getopnum()
-            fcond = self.operations[opnum](self, op, regalloc, fcond)
+        self._walk_operations(operations, regalloc)
 
         self._patch_sp_offset(sp_patch_location, regalloc)
 
@@ -255,23 +258,59 @@
 
     def _patch_sp_offset(self, addr, regalloc):
         cb = ARMv7InMemoryBuilder(addr, ARMv7InMemoryBuilder.size_of_gen_load_int)
+        # Note: the frame_depth is one less than the value stored in the frame
+        # manager
         if regalloc.frame_manager.frame_depth == 1:
             return
-        n = (regalloc.frame_manager.frame_depth)*WORD
+        n = (regalloc.frame_manager.frame_depth-1)*WORD
         self._adjust_sp(n, regalloc, cb)
 
     def _adjust_sp(self, n, regalloc, cb=None, fcond=c.AL):
         if cb is None:
             cb = self.mc
+        if n < 0:
+            n = -n
+            rev = True
+        else:
+            rev = False
         if n <= 0xFF and fcond == c.AL:
-            cb.SUB_ri(r.sp.value, r.sp.value, n)
+            if rev:
+                op = cb.ADD_ri
+            else:
+                op = cb.SUB_ri
+            op(r.sp.value, r.sp.value, n)
         else:
             b = TempBox()
             reg = regalloc.force_allocate_reg(b)
             cb.gen_load_int(reg.value, n, cond=fcond)
-            cb.SUB_rr(r.sp.value, r.sp.value, reg.value, cond=fcond)
+            if rev:
+                op = cb.ADD_rr
+            else:
+                op = cb.SUB_rr
+            op(r.sp.value, r.sp.value, reg.value, cond=fcond)
             regalloc.possibly_free_var(b)
 
+    def _walk_operations(self, operations, regalloc):
+        fcond=c.AL
+        i = 0
+        while i < len(operations):
+            op = operations[i]
+            # XXX consider merging ops with next one if it is an adecuate guard
+            opnum = op.getopnum()
+            if self.can_merge_with_next_guard(op, i, operations):
+                fcond = self.operations_with_guard[opnum](self, op,
+                                            operations[i+1], regalloc, fcond)
+                i += 1
+            else:
+                fcond = self.operations[opnum](self, op, regalloc, fcond)
+            i += 1
+
+    def can_merge_with_next_guard(self, op, i, operations):
+        if op.getopnum() == rop.CALL_MAY_FORCE or op.getopnum() == rop.CALL_ASSEMBLER:
+            assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
+            return True
+        return False
+
     def assemble_bridge(self, faildescr, inputargs, operations):
         enc = rffi.cast(rffi.CCHARP, faildescr._failure_recovery_code)
         longevity = compute_vars_longevity(inputargs, operations)
@@ -280,10 +319,7 @@
         regalloc.update_bindings(enc, inputargs)
         bridge_head = self.mc.curraddr()
 
-        fcond = c.AL
-        for op in operations:
-            opnum = op.getopnum()
-            fcond = self.operations[opnum](self, op, regalloc, fcond)
+        self._walk_operations(operations, regalloc)
         self.gen_func_epilog()
         print 'Done building bridges'
         self.patch_trace(faildescr, bridge_head)
@@ -322,6 +358,9 @@
             self.mc.MOV_rr(loc.value, prev_loc.value)
     mov_loc_loc = regalloc_mov
 
+    def leave_jitted_hook(self):
+        pass
+
 def make_operation_list():
     def notimplemented(self, op, regalloc, fcond):
         raise NotImplementedError, op
@@ -339,4 +378,19 @@
         operations[value] = func
     return operations
 
+def make_guard_operation_list():
+    def notimplemented(self, op, guard_op, regalloc, fcond):
+        raise NotImplementedError, op
+    guard_operations = [notimplemented] * rop._LAST
+    for key, value in rop.__dict__.items():
+        key = key.lower()
+        if key.startswith('_'):
+            continue
+        methname = 'emit_guard_%s' % key
+        if hasattr(AssemblerARM, methname):
+            func = getattr(AssemblerARM, methname).im_func
+            guard_operations[value] = func
+    return guard_operations
+
 AssemblerARM.operations = make_operation_list()
+AssemblerARM.operations_with_guard = make_guard_operation_list()

Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/helper/assembler.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/helper/assembler.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/helper/assembler.py	Sat Nov 13 17:21:19 2010
@@ -50,8 +50,10 @@
         arg2 = regalloc.make_sure_var_in_reg(a1, [a0], selected_reg=r.r1, imm_fine=False)
         assert arg1 == r.r0
         assert arg2 == r.r1
+        regalloc.before_call()
         res = regalloc.force_allocate_reg(op.result, selected_reg=r.r0)
         getattr(self.mc, opname)(fcond)
+        regalloc.after_call(op.result)
         regalloc.possibly_free_vars_for_op(op)
         return fcond
     return f

Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/opassembler.py	Sat Nov 13 17:21:19 2010
@@ -230,7 +230,7 @@
         self._gen_path_to_exit_path(op, op.getarglist(), regalloc, c.AL)
         return fcond
 
-    def emit_op_call(self, op, regalloc, fcond):
+    def emit_op_call(self, op, regalloc, fcond, save_all_regs=False):
         locs = []
         # all arguments past the 4th go on the stack
         # XXX support types other than int (one word types)
@@ -244,17 +244,23 @@
                 regalloc.possibly_free_var(reg)
 
         adr = self.cpu.cast_adr_to_int(op.getarg(0).getint())
-        # XXX use PUSH here instead of spilling every reg for itself
-        regalloc.before_call()
 
         reg_args = min(op.numargs()-1, 4)
         for i in range(1, reg_args+1):
             l = regalloc.make_sure_var_in_reg(op.getarg(i),
                                             selected_reg=r.all_regs[i-1])
             locs.append(l)
-        self.mc.BL(adr)
+        # XXX use PUSH here instead of spilling every reg for itself
+        if save_all_regs:
+            regalloc.before_call(r.all_regs, save_all_regs)
+        else:
+            regalloc.before_call()
         regalloc.force_allocate_reg(op.result, selected_reg=r.r0)
+        self.mc.BL(adr)
         regalloc.after_call(op.result)
+        # readjust the sp in case we passed some args on the stack
+        if op.numargs() > 5:
+            self._adjust_sp(-n, regalloc, fcond=fcond)
         regalloc.possibly_free_vars(locs)
 
 class FieldOpAssembler(object):
@@ -508,9 +514,35 @@
         f(value_loc.value, temp.value, basesize, cond=fcond)
         return fcond
 
+class ForceOpAssembler(object):
+    def emit_op_force_token(self, op, regalloc, fcond):
+        res_loc = regalloc.force_allocate_reg(op.result)
+        self.mc.MOV_rr(res_loc.value, r.fp.value)
+        return fcond
+
+    def emit_guard_call_may_force(self, op, guard_op, regalloc, fcond):
+        faildescr = guard_op.getdescr()
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        t = TempBox()
+        l0 = regalloc.force_allocate_reg(t)
+        self.mc.gen_load_int(l0.value, fail_index)
+        self.mc.STR_ri(l0.value, r.fp.value)
+
+        # force all reg values to be spilled when calling
+        fcond = self.emit_op_call(op, regalloc, fcond, save_all_regs=True)
+
+        self.mc.LDR_ri(l0.value, r.fp.value)
+        self.mc.CMP_ri(l0.value, 0)
+
+        regalloc.possibly_free_var(t)
+
+        self._emit_guard(guard_op, regalloc, c.LT)
+        return fcond
+
 class ResOpAssembler(GuardOpAssembler, IntOpAsslember,
                     OpAssembler, UnaryIntOpAssembler,
                     FieldOpAssembler, ArrayOpAssember,
-                    StrOpAssembler, UnicodeOpAssembler):
+                    StrOpAssembler, UnicodeOpAssembler,
+                    ForceOpAssembler):
     pass
 

Modified: pypy/branch/arm-backend/pypy/jit/backend/arm/runner.py
==============================================================================
--- pypy/branch/arm-backend/pypy/jit/backend/arm/runner.py	(original)
+++ pypy/branch/arm-backend/pypy/jit/backend/arm/runner.py	Sat Nov 13 17:21:19 2010
@@ -61,3 +61,18 @@
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return self.cast_adr_to_int(adr)
+
+    def force(self, addr_of_force_index):
+        TP = rffi.CArrayPtr(lltype.Signed)
+        fail_index = rffi.cast(TP, addr_of_force_index)[0]
+        assert fail_index >= 0, "already forced!"
+        faildescr = self.get_fail_descr_from_number(fail_index)
+        rffi.cast(TP, addr_of_force_index)[0] = -1
+        # start of "no gc operation!" block
+        fail_index_2 = self.assembler.failure_recovery_func(
+            faildescr._failure_recovery_code,
+            addr_of_force_index)
+        self.assembler.leave_jitted_hook()
+        # end of "no gc operation!" block
+        #assert fail_index == fail_index_2
+        return faildescr



More information about the Pypy-commit mailing list