[pypy-commit] pypy ppc-jit-backend: (bivab, hager): use more space efficient guard state encoding like X86 and ARM backends

hager noreply at buildbot.pypy.org
Tue Feb 28 14:08:05 CET 2012


Author: hager <sven.hager at uni-duesseldorf.de>
Branch: ppc-jit-backend
Changeset: r52972:3e4381b13941
Date: 2012-02-27 17:40 +0100
http://bitbucket.org/pypy/pypy/changeset/3e4381b13941/

Log:	(bivab, hager): use more space efficient guard state encoding like
	X86 and ARM backends

diff --git a/pypy/jit/backend/ppc/ppc_assembler.py b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -162,12 +162,13 @@
         @rgc.no_collect
         def failure_recovery_func(mem_loc, spilling_pointer):
             """
-                mem_loc is a structure in memory describing where the values for
-                the failargs are stored.
+                mem_loc is a pointer to the beginning of the encoding.
 
                 spilling_pointer is the address of the FORCE_INDEX.
             """
-            return self.decode_registers_and_descr(mem_loc, spilling_pointer)
+            regs = rffi.cast(rffi.LONGP, spilling_pointer)
+            return self.decode_registers_and_descr(mem_loc, 
+                                                   spilling_pointer, regs)
 
         self.failure_recovery_func = failure_recovery_func
 
@@ -175,110 +176,125 @@
             lltype.Signed], lltype.Signed))
 
     @rgc.no_collect
-    def decode_registers_and_descr(self, mem_loc, spp_loc):
-        ''' 
-            mem_loc     : pointer to encoded state
-            spp_loc     : pointer to begin of the spilling area
-            '''
-        enc = rffi.cast(rffi.CCHARP, mem_loc)
-        managed_size = WORD * len(r.MANAGED_REGS)
-        regs = rffi.cast(rffi.CCHARP, spp_loc)
-        i = -1
-        fail_index = -1
-        while(True):
-            i += 1
-            fail_index += 1
-            res = enc[i]
-            if res == self.END_OF_LOCS:
-                break
-            if res == self.EMPTY_LOC:
-                continue
-
-            group = res
-            i += 1
-            res = enc[i]
-            if res == self.IMM_LOC:
-               # imm value
-                if group == self.INT_TYPE or group == self.REF_TYPE:
-                    if IS_PPC_32:
-                        value = decode32(enc, i+1)
-                        i += 4
-                    else:
-                        value = decode64(enc, i+1)
-                        i += 8
-                else:
-                    assert 0, "not implemented yet"
-            elif res == self.STACK_LOC:
-                stack_location = decode32(enc, i+1)
-                i += 4
-                if group == self.FLOAT_TYPE:
+    def decode_registers_and_descr(self, mem_loc, spp, registers):
+        """Decode locations encoded in memory at mem_loc and write the values
+        to the failboxes.  Values for spilled vars and registers are stored on
+        stack at frame_loc """
+        assert spp & 1 == 0
+        self.fail_force_index = spp
+        bytecode = rffi.cast(rffi.UCHARP, mem_loc)
+        num = 0
+        value = 0
+        fvalue = 0
+        code_inputarg = False
+        while True:
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= self.CODE_FROMSTACK:
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                # load the value from the stack
+                kind = code & 3
+                code = int((code - self.CODE_FROMSTACK) >> 2)
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
+                if kind == self.DESCR_FLOAT:
                     assert 0, "not implemented yet"
                 else:
-                    start = spp_loc + get_spp_offset(stack_location)
+                    start = spp + get_spp_offset(int(code))
                     value = rffi.cast(rffi.LONGP, start)[0]
-            else: # REG_LOC
-                reg = ord(enc[i])
-                if group == self.FLOAT_TYPE:
+            else:
+                # 'code' identifies a register: load its value
+                kind = code & 3
+                if kind == self.DESCR_SPECIAL:
+                    if code == self.CODE_HOLE:
+                        num += 1
+                        continue
+                    if code == self.CODE_INPUTARG:
+                        code_inputarg = True
+                        continue
+                    assert code == self.CODE_STOP
+                    break
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
                     assert 0, "not implemented yet"
                 else:
-                    regindex = r.get_managed_reg_index(reg)
-                    if IS_PPC_32:
-                        value = decode32(regs, regindex * WORD)
-                    else:
-                        value = decode64(regs, regindex * WORD)
-    
-            if group == self.INT_TYPE:
-                self.fail_boxes_int.setitem(fail_index, value)
-            elif group == self.REF_TYPE:
-                tgt = self.fail_boxes_ptr.get_addr_for_num(fail_index)
+                    reg_index = r.get_managed_reg_index(code)
+                    value = registers[reg_index]
+            # store the loaded value into fail_boxes_<type>
+            if kind == self.DESCR_FLOAT:
+                assert 0, "not implemented yet"
+            else:
+                if kind == self.DESCR_INT:
+                    tgt = self.fail_boxes_int.get_addr_for_num(num)
+                elif kind == self.DESCR_REF:
+                    assert (value & 3) == 0, "misaligned pointer"
+                    tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+                else:
+                    assert 0, "bogus kind"
                 rffi.cast(rffi.LONGP, tgt)[0] = value
+            num += 1
+        self.fail_boxes_count = num
+        fail_index = rffi.cast(rffi.INTP, bytecode)[0]
+        fail_index = rffi.cast(lltype.Signed, fail_index)
+        return fail_index
+
+    def decode_inputargs(self, code):
+        descr_to_box_type = [REF, INT, FLOAT]
+        bytecode = rffi.cast(rffi.UCHARP, code)
+        arglocs = []
+        code_inputarg = False
+        while 1:
+            # decode the next instruction from the bytecode
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= self.CODE_FROMSTACK:
+                # 'code' identifies a stack location
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                kind = code & 3
+                code = (code - self.CODE_FROMSTACK) >> 2
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
+                loc = PPCFrameManager.frame_pos(code, descr_to_box_type[kind])
+            elif code == self.CODE_STOP:
+                break
+            elif code == self.CODE_HOLE:
+                continue
+            elif code == self.CODE_INPUTARG:
+                code_inputarg = True
+                continue
             else:
-                assert 0, 'unknown type'
-
-        assert enc[i] == self.END_OF_LOCS
-        descr = decode32(enc, i+1)
-        self.fail_boxes_count = fail_index
-        self.fail_force_index = spp_loc
-        assert isinstance(descr, int)
-        return descr
-
-    def decode_inputargs(self, enc):
-        locs = []
-        j = 0
-        while enc[j] != self.END_OF_LOCS:
-            res = enc[j]
-            if res == self.EMPTY_LOC:
-                j += 1
-                continue
-
-            assert res in [self.INT_TYPE, self.REF_TYPE],\
-                    'location type is not supported'
-            res_type = res
-            j += 1
-            res = enc[j]
-            if res == self.IMM_LOC:
-                # XXX decode imm if necessary
-                assert 0, 'Imm Locations are not supported'
-            elif res == self.STACK_LOC:
-                if res_type == self.FLOAT_TYPE:
-                    t = FLOAT
-                elif res_type == self.INT_TYPE:
-                    t = INT
-                else:
-                    t = REF
-                assert t != FLOAT
-                stack_loc = decode32(enc, j+1)
-                loc = PPCFrameManager.frame_pos(stack_loc, t)
-                j += 4
-            else: # REG_LOC
-                if res_type == self.FLOAT_TYPE:
+                # 'code' identifies a register
+                kind = code & 3
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
                     assert 0, "not implemented yet"
                 else:
-                    reg = ord(res)
-                    loc = r.MANAGED_REGS[r.get_managed_reg_index(reg)]
-            j += 1
-            locs.append(loc)
-        return locs
+                    #loc = r.all_regs[code]
+                    assert (r.ALL_REGS[code] is 
+                            r.MANAGED_REGS[r.get_managed_reg_index(code)])
+                    loc = r.ALL_REGS[code]
+            arglocs.append(loc)
+        return arglocs[:]
 
     def _build_malloc_slowpath(self):
         mc = PPCBuilder()
@@ -505,10 +521,9 @@
     def assemble_bridge(self, faildescr, inputargs, operations, looptoken, log):
         operations = self.setup(looptoken, operations)
         assert isinstance(faildescr, AbstractFailDescr)
-        code = faildescr._failure_recovery_code
-        enc = rffi.cast(rffi.CCHARP, code)
+        code = self._find_failure_recovery_bytecode(faildescr)
         frame_depth = faildescr._ppc_frame_depth
-        arglocs = self.decode_inputargs(enc)
+        arglocs = self.decode_inputargs(code)
         if not we_are_translated():
             assert len(inputargs) == len(arglocs)
         regalloc = Regalloc(assembler=self, frame_manager=PPCFrameManager())
@@ -550,56 +565,65 @@
         mc.prepare_insts_blocks()
         mc.copy_to_raw_memory(rawstart + sp_patch_location)
 
-    # For an explanation of the encoding, see
-    # backend/arm/assembler.py
-    def gen_descr_encoding(self, descr, args, arglocs):
-        minsize = (len(arglocs) - 1) * 6 + 5
-        memsize = self.align(minsize)
-        memaddr = self.datablockwrapper.malloc_aligned(memsize, alignment=1)
-        mem = rffi.cast(rffi.CArrayPtr(lltype.Char), memaddr)
-        i = 0
-        j = 0
-        while i < len(args):
-            if arglocs[i+1]:
-                arg = args[i]
-                loc = arglocs[i+1]
-                if arg.type == INT:
-                    mem[j] = self.INT_TYPE
-                    j += 1
-                elif arg.type == REF:
-                    mem[j] = self.REF_TYPE
-                    j += 1
+    DESCR_REF       = 0x00
+    DESCR_INT       = 0x01
+    DESCR_FLOAT     = 0x02
+    DESCR_SPECIAL   = 0x03
+    CODE_FROMSTACK  = 128
+    CODE_STOP       = 0 | DESCR_SPECIAL
+    CODE_HOLE       = 4 | DESCR_SPECIAL
+    CODE_INPUTARG   = 8 | DESCR_SPECIAL
+
+    def gen_descr_encoding(self, descr, failargs, locs):
+        assert self.mc is not None
+        buf = []
+        for i in range(len(failargs)):
+            arg = failargs[i]
+            if arg is not None:
+                if arg.type == REF:
+                    kind = self.DESCR_REF
+                elif arg.type == INT:
+                    kind = self.DESCR_INT
                 elif arg.type == FLOAT:
-                    assert 0, "not implemented yet"
+                    assert 0, "not implemented"
                 else:
-                    assert 0, 'unknown type'
+                    raise AssertionError("bogus kind")
+                loc = locs[i]
+                if loc.is_stack():
+                    pos = loc.position
+                    if pos < 0:
+                        buf.append(self.CODE_INPUTARG)
+                        pos = ~pos
+                    n = self.CODE_FROMSTACK // 4 + pos
+                else:
+                    assert loc.is_reg() or loc.is_vfp_reg()
+                    n = loc.value
+                n = kind + 4 * n
+                while n > 0x7F:
+                    buf.append((n & 0x7F) | 0x80)
+                    n >>= 7
+            else:
+                n = self.CODE_HOLE
+            buf.append(n)
+        buf.append(self.CODE_STOP)
 
-                if loc.is_reg() or loc.is_vfp_reg():
-                    mem[j] = chr(loc.value)
-                    j += 1
-                elif loc.is_imm() or loc.is_imm_float():
-                    assert (arg.type == INT or arg.type == REF
-                                or arg.type == FLOAT)
-                    mem[j] = self.IMM_LOC
-                    if IS_PPC_32:
-                        encode32(mem, j+1, loc.getint())
-                        j += 5
-                    else:
-                        encode64(mem, j+1, loc.getint())
-                        j += 9
-                else:
-                    mem[j] = self.STACK_LOC
-                    encode32(mem, j+1, loc.position)
-                    j += 5
-            else:
-                mem[j] = self.EMPTY_LOC
-                j += 1
-            i += 1
+        fdescr = self.cpu.get_fail_descr_number(descr)
 
-        mem[j] = chr(0xFF)
-        n = self.cpu.get_fail_descr_number(descr)
-        encode32(mem, j+1, n)
-        return memaddr
+        buf.append((fdescr >> 24) & 0xFF)
+        buf.append((fdescr >> 16) & 0xFF)
+        buf.append((fdescr >>  8) & 0xFF)
+        buf.append( fdescr        & 0xFF)
+        
+        lenbuf = len(buf)
+        # XXX fix memory leaks
+        enc_arr = lltype.malloc(rffi.CArray(rffi.CHAR), lenbuf, 
+                                flavor='raw', track_allocation=False)
+        enc_ptr = rffi.cast(lltype.Signed, enc_arr)
+        for i, byte in enumerate(buf):
+            enc_arr[i] = chr(byte)
+        # assert that the fail_boxes lists are big enough
+        assert len(failargs) <= self.fail_boxes_int.SIZE
+        return enc_ptr
 
     def align(self, size):
         while size % 8 != 0:
@@ -700,6 +724,9 @@
 
         return frame_depth
     
+    def _find_failure_recovery_bytecode(self, faildescr):
+        return faildescr._failure_recovery_code_adr
+
     def fixup_target_tokens(self, rawstart):
         for targettoken in self.target_tokens_currently_compiling:
             targettoken._ppc_loop_code += rawstart
@@ -726,29 +753,29 @@
             pos = self.mc.currpos()
             tok.pos_recovery_stub = pos 
 
-            memaddr = self.gen_exit_stub(descr, tok.failargs,
+            encoding_adr = self.gen_exit_stub(descr, tok.failargs,
                                             tok.faillocs,
                                             save_exc=tok.save_exc)
+
             # store info on the descr
             descr._ppc_frame_depth = tok.faillocs[0].getint()
-            descr._failure_recovery_code = memaddr
+            descr._failure_recovery_code_adr = encoding_adr
             descr._ppc_guard_pos = pos
 
     def gen_exit_stub(self, descr, args, arglocs, save_exc=False):
-        memaddr = self.gen_descr_encoding(descr, args, arglocs)
-
-        # store addr in force index field
-        self.mc.alloc_scratch_reg()
-        self.mc.load_imm(r.SCRATCH, memaddr)
-        self.mc.store(r.SCRATCH.value, r.SPP.value, self.ENCODING_AREA)
-        self.mc.free_scratch_reg()
-
         if save_exc:
             path = self._leave_jitted_hook_save_exc
         else:
             path = self._leave_jitted_hook
+
+        # write state encoding to memory and store the address of the beginning
+        # of the encoding in the FORCE INDEX slot
+        encoding_adr = self.gen_descr_encoding(descr, args, arglocs[1:])
+        with scratch_reg(self.mc):
+            self.mc.load_imm(r.SCRATCH, encoding_adr)
+            self.mc.store(r.SCRATCH.value, r.SPP.value, self.ENCODING_AREA)
         self.mc.b_abs(path)
-        return memaddr
+        return encoding_adr
 
     def process_pending_guards(self, block_start):
         clt = self.current_clt
@@ -775,6 +802,7 @@
         mc.b_abs(bridge_addr)
         mc.prepare_insts_blocks()
         mc.copy_to_raw_memory(patch_addr)
+        faildescr._failure_recovery_code_ofs = 0
 
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -1023,3 +1051,6 @@
 
 AssemblerPPC.operations = operations
 AssemblerPPC.operations_with_guard = operations_with_guard
+
+class BridgeAlreadyCompiled(Exception):
+    pass
diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py
--- a/pypy/jit/backend/ppc/runner.py
+++ b/pypy/jit/backend/ppc/runner.py
@@ -91,6 +91,11 @@
         adr = llmemory.cast_ptr_to_adr(x)
         return PPC_64_CPU.cast_adr_to_int(adr)
 
+    # XXX find out how big FP registers are on PPC32
+    all_null_registers = lltype.malloc(rffi.LONGP.TO,
+                        len(r.MANAGED_REGS),
+                        flavor='raw', zero=True, immortal=True)
+
     def force(self, spilling_pointer):
         TP = rffi.CArrayPtr(lltype.Signed)
 
@@ -101,9 +106,13 @@
         faildescr = self.get_fail_descr_from_number(fail_index)
         rffi.cast(TP, addr_of_force_index)[0] = ~fail_index
 
+        bytecode = self.asm._find_failure_recovery_bytecode(faildescr)
+        addr_all_null_registers = rffi.cast(rffi.LONG, self.all_null_registers)
         # start of "no gc operation!" block
         fail_index_2 = self.asm.decode_registers_and_descr(
-                faildescr._failure_recovery_code, spilling_pointer)
+                bytecode,
+                spilling_pointer,
+                self.all_null_registers)
         self.asm.leave_jitted_hook()
         # end of "no gc operation!" block
         assert fail_index == fail_index_2


More information about the pypy-commit mailing list