[pypy-commit] pypy jit-simplify-backendintf: In-progress. Three complicated and long functions are gone :-)
arigo
noreply at buildbot.pypy.org
Sun Dec 11 23:42:40 CET 2011
Author: Armin Rigo <arigo at tunes.org>
Branch: jit-simplify-backendintf
Changeset: r50402:a4f5ba4d79b4
Date: 2011-12-11 23:42 +0100
http://bitbucket.org/pypy/pypy/changeset/a4f5ba4d79b4/
Log: In-progress. Three complicated and long functions are gone :-)
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -69,6 +69,8 @@
self.bindings[box] = loc
#
index = self.get_loc_index(loc)
+ if index < 0:
+ return
endindex = index + self.frame_size(box.type)
while len(self.used) < endindex:
self.used.append(False)
@@ -91,6 +93,8 @@
#
size = self.frame_size(box.type)
baseindex = self.get_loc_index(loc)
+ if baseindex < 0:
+ return
for i in range(size):
index = baseindex + i
assert 0 <= index < len(self.used)
@@ -98,7 +102,8 @@
def try_to_reuse_location(self, box, loc):
index = self.get_loc_index(loc)
- assert index >= 0
+ if index < 0:
+ return False
size = self.frame_size(box.type)
for i in range(size):
while (index + i) >= len(self.used):
@@ -158,7 +163,7 @@
if not we_are_translated() and self.box_types is not None:
assert isinstance(v, TempBox) or v.type in self.box_types
- def possibly_free_var(self, v):
+ def possibly_free_var(self, v, _hint_dont_reuse_quickly=False):
""" If v is stored in a register and v is not used beyond the
current position, then free it. Must be called at some
point for all variables that might be in registers.
@@ -168,7 +173,10 @@
return
if v not in self.longevity or self.longevity[v][1] <= self.position:
if v in self.reg_bindings:
- self.free_regs.append(self.reg_bindings[v])
+ if _hint_dont_reuse_quickly:
+ self.free_regs.insert(0, self.reg_bindings[v])
+ else:
+ self.free_regs.append(self.reg_bindings[v])
del self.reg_bindings[v]
if self.frame_manager is not None:
self.frame_manager.mark_as_free(v)
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -421,10 +421,8 @@
def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
'''adds the following attributes to looptoken:
- _x86_loop_code (an integer giving an address)
- _x86_bootstrap_code (an integer giving an address)
- _x86_direct_bootstrap_code ( " " " " )
- _x86_arglocs
+ _x86_function_addr (address of the generated func, as an int)
+ _x86_loop_code (debug: addr of the start of the ResOps)
_x86_debug_checksum
'''
# XXX this function is too longish and contains some code
@@ -445,12 +443,11 @@
operations = self._inject_debugging_code(looptoken, operations)
regalloc = RegAlloc(self, self.cpu.translate_support_code)
- arglocs, operations = regalloc.prepare_loop(inputargs, operations,
- looptoken, clt.allgcrefs)
- looptoken._x86_arglocs = arglocs
-
- bootstrappos = self.mc.get_relative_pos()
- stackadjustpos = self._assemble_bootstrap_code(inputargs, arglocs)
+ #
+ self._call_header_with_stack_check()
+ stackadjustpos = self._patchable_stackadjust()
+ operations = regalloc.prepare_loop(inputargs, operations,
+ looptoken, clt.allgcrefs)
looppos = self.mc.get_relative_pos()
looptoken._x86_loop_code = looppos
clt.frame_depth = -1 # temporarily
@@ -458,19 +455,17 @@
frame_depth, param_depth = self._assemble(regalloc, operations)
clt.frame_depth = frame_depth
clt.param_depth = param_depth
-
- directbootstrappos = self.mc.get_relative_pos()
- self._assemble_bootstrap_direct_call(arglocs, looppos,
- frame_depth+param_depth)
+ #
+ size_excluding_failure_stuff = self.mc.get_relative_pos()
self.write_pending_failure_recoveries()
- fullsize = self.mc.get_relative_pos()
+ full_size = self.mc.get_relative_pos()
#
rawstart = self.materialize_loop(looptoken)
debug_start("jit-backend-addr")
debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
looptoken.number, loopname,
rawstart + looppos,
- rawstart + directbootstrappos,
+ rawstart + size_excluding_failure_stuff,
rawstart))
debug_stop("jit-backend-addr")
self._patch_stackadjust(rawstart + stackadjustpos,
@@ -481,18 +476,17 @@
if not we_are_translated():
# used only by looptoken.dump() -- useful in tests
looptoken._x86_rawstart = rawstart
- looptoken._x86_fullsize = fullsize
+ looptoken._x86_fullsize = full_size
looptoken._x86_ops_offset = ops_offset
+ looptoken._x86_function_addr = rawstart
- looptoken._x86_bootstrap_code = rawstart + bootstrappos
- looptoken._x86_direct_bootstrap_code = rawstart + directbootstrappos
self.fixup_target_tokens(rawstart)
self.teardown()
# oprofile support
if self.cpu.profile_agent is not None:
name = "Loop # %s: %s" % (looptoken.number, loopname)
self.cpu.profile_agent.native_code_written(name,
- rawstart, fullsize)
+ rawstart, full_size)
return ops_offset
def assemble_bridge(self, faildescr, inputargs, operations,
@@ -802,98 +796,6 @@
self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
self.mc.SUB_mi8((ebx.value, 0), 2*WORD) # SUB [ebx], 2*WORD
- def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
- if IS_X86_64:
- return self._assemble_bootstrap_direct_call_64(arglocs, jmppos, stackdepth)
- # XXX pushing ebx esi and edi is a bit pointless, since we store
- # all regsiters anyway, for the case of guard_not_forced
- # XXX this can be improved greatly. Right now it'll behave like
- # a normal call
- nonfloatlocs, floatlocs = arglocs
- self._call_header_with_stack_check()
- self.mc.LEA_rb(esp.value, self._get_offset_of_ebp_from_esp(stackdepth))
- offset = 2 * WORD
- tmp = eax
- xmmtmp = xmm0
- for i in range(len(nonfloatlocs)):
- loc = nonfloatlocs[i]
- if loc is not None:
- if isinstance(loc, RegLoc):
- assert not loc.is_xmm
- self.mc.MOV_rb(loc.value, offset)
- else:
- self.mc.MOV_rb(tmp.value, offset)
- self.mc.MOV(loc, tmp)
- offset += WORD
- loc = floatlocs[i]
- if loc is not None:
- if isinstance(loc, RegLoc):
- assert loc.is_xmm
- self.mc.MOVSD_xb(loc.value, offset)
- else:
- self.mc.MOVSD_xb(xmmtmp.value, offset)
- assert isinstance(loc, StackLoc)
- self.mc.MOVSD_bx(loc.value, xmmtmp.value)
- offset += 2 * WORD
- endpos = self.mc.get_relative_pos() + 5
- self.mc.JMP_l(jmppos - endpos)
- assert endpos == self.mc.get_relative_pos()
-
- def _assemble_bootstrap_direct_call_64(self, arglocs, jmppos, stackdepth):
- # XXX: Very similar to _emit_call_64
-
- src_locs = []
- dst_locs = []
- xmm_src_locs = []
- xmm_dst_locs = []
- get_from_stack = []
-
- # In reverse order for use with pop()
- unused_gpr = [r9, r8, ecx, edx, esi, edi]
- unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
-
- nonfloatlocs, floatlocs = arglocs
- self._call_header_with_stack_check()
- self.mc.LEA_rb(esp.value, self._get_offset_of_ebp_from_esp(stackdepth))
-
- # The lists are padded with Nones
- assert len(nonfloatlocs) == len(floatlocs)
-
- for i in range(len(nonfloatlocs)):
- loc = nonfloatlocs[i]
- if loc is not None:
- if len(unused_gpr) > 0:
- src_locs.append(unused_gpr.pop())
- dst_locs.append(loc)
- else:
- get_from_stack.append((loc, False))
-
- floc = floatlocs[i]
- if floc is not None:
- if len(unused_xmm) > 0:
- xmm_src_locs.append(unused_xmm.pop())
- xmm_dst_locs.append(floc)
- else:
- get_from_stack.append((floc, True))
-
- remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
- remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
-
- for i in range(len(get_from_stack)):
- loc, is_xmm = get_from_stack[i]
- if is_xmm:
- self.mc.MOVSD_xb(X86_64_XMM_SCRATCH_REG.value, (2 + i) * WORD)
- self.mc.MOVSD(loc, X86_64_XMM_SCRATCH_REG)
- else:
- self.mc.MOV_rb(X86_64_SCRATCH_REG.value, (2 + i) * WORD)
- # XXX: We're assuming that "loc" won't require regloc to
- # clobber the scratch register
- self.mc.MOV(loc, X86_64_SCRATCH_REG)
-
- endpos = self.mc.get_relative_pos() + 5
- self.mc.JMP_l(jmppos - endpos)
- assert endpos == self.mc.get_relative_pos()
-
def redirect_call_assembler(self, oldlooptoken, newlooptoken):
# some minimal sanity checking
oldnonfloatlocs, oldfloatlocs = oldlooptoken._x86_arglocs
@@ -909,45 +811,6 @@
mc.JMP(imm(target))
mc.copy_to_raw_memory(oldadr)
- def _assemble_bootstrap_code(self, inputargs, arglocs):
- nonfloatlocs, floatlocs = arglocs
- self._call_header()
- stackadjustpos = self._patchable_stackadjust()
- tmp = eax
- xmmtmp = xmm0
- self.mc.begin_reuse_scratch_register()
- for i in range(len(nonfloatlocs)):
- loc = nonfloatlocs[i]
- if loc is None:
- continue
- if isinstance(loc, RegLoc):
- target = loc
- else:
- target = tmp
- if inputargs[i].type == REF:
- adr = self.fail_boxes_ptr.get_addr_for_num(i)
- self.mc.MOV(target, heap(adr))
- self.mc.MOV(heap(adr), imm0)
- else:
- adr = self.fail_boxes_int.get_addr_for_num(i)
- self.mc.MOV(target, heap(adr))
- if target is not loc:
- assert isinstance(loc, StackLoc)
- self.mc.MOV_br(loc.value, target.value)
- for i in range(len(floatlocs)):
- loc = floatlocs[i]
- if loc is None:
- continue
- adr = self.fail_boxes_float.get_addr_for_num(i)
- if isinstance(loc, RegLoc):
- self.mc.MOVSD(loc, heap(adr))
- else:
- self.mc.MOVSD(xmmtmp, heap(adr))
- assert isinstance(loc, StackLoc)
- self.mc.MOVSD_bx(loc.value, xmmtmp.value)
- self.mc.end_reuse_scratch_register()
- return stackadjustpos
-
def dump(self, text):
if not self.verbose:
return
@@ -2104,9 +1967,9 @@
# returns in eax the fail_index
# now we return from the complete frame, which starts from
- # _assemble_bootstrap_code(). The LEA in _call_footer below throws
- # away most of the frame, including all the PUSHes that we did just
- # above.
+ # _call_header_with_stack_check(). The LEA in _call_footer below
+ # throws away most of the frame, including all the PUSHes that we
+ # did just above.
self._call_footer()
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -28,7 +28,7 @@
class X86RegisterManager(RegisterManager):
box_types = [INT, REF]
- all_regs = [eax, ecx, edx, ebx, esi, edi]
+ all_regs = [ecx, eax, edx, ebx, esi, edi]
no_lower_byte_regs = [esi, edi]
save_around_call_regs = [eax, edx, ecx]
frame_reg = ebp
@@ -60,7 +60,7 @@
class X86_64_RegisterManager(X86RegisterManager):
# r11 omitted because it's used as scratch
- all_regs = [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
+ all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
no_lower_byte_regs = []
save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
@@ -173,22 +173,26 @@
operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
allgcrefs)
# compute longevity of variables
- longevity, useful = self._compute_vars_longevity(inputargs, operations)
+ longevity = self._compute_vars_longevity(inputargs, operations)
self.longevity = longevity
self.rm = gpr_reg_mgr_cls(longevity,
frame_manager = self.fm,
assembler = self.assembler)
self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
assembler = self.assembler)
- return operations, useful
+ return operations
def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
- operations, useful = self._prepare(inputargs, operations, allgcrefs)
- return self._process_inputargs(inputargs, useful), operations
+ operations = self._prepare(inputargs, operations, allgcrefs)
+ self._set_initial_bindings(inputargs)
+ # note: we need to make a copy of inputargs because possibly_free_vars
+ # is also used on op args, which is a non-resizable list
+ self.possibly_free_vars(list(inputargs))
+ return operations
def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
allgcrefs):
- operations, _ = self._prepare(inputargs, operations, allgcrefs)
+ operations = self._prepare(inputargs, operations, allgcrefs)
self._update_bindings(arglocs, inputargs)
self.param_depth = prev_depths[1]
return operations
@@ -196,46 +200,30 @@
def reserve_param(self, n):
self.param_depth = max(self.param_depth, n)
- def _process_inputargs(self, inputargs, useful):
- # XXX we can sort out here by longevity if we need something
- # more optimal
- floatlocs = [None] * len(inputargs)
- nonfloatlocs = [None] * len(inputargs)
- # Don't use all_regs[0] for passing arguments around a loop.
- # Must be kept in sync with consider_jump().
- # XXX this should probably go to llsupport/regalloc.py
- xmmtmp = self.xrm.free_regs.pop(0)
- tmpreg = self.rm.free_regs.pop(0)
- assert tmpreg == X86RegisterManager.all_regs[0]
- assert xmmtmp == X86XMMRegisterManager.all_regs[0]
+ def _set_initial_bindings(self, inputargs):
+ if IS_X86_64:
+ return self._set_initial_bindings_64(inputargs)
+ # ...
+ # stack layout: arg2
+ # arg1
+ # arg0
+ # return address
+ # saved ebp <-- ebp points here
+ # ...
+ cur_frame_pos = - 1 - FRAME_FIXED_SIZE
+ assert get_ebp_ofs(cur_frame_pos-1) == 2*WORD
+ assert get_ebp_ofs(cur_frame_pos-2) == 3*WORD
+ #
for i in range(len(inputargs)):
- arg = inputargs[i]
- assert not isinstance(arg, Const)
- reg = None
- if self.longevity[arg][1] > -1 and arg in useful:
- if arg.type == FLOAT:
- # xxx is it really a good idea? at the first CALL they
- # will all be flushed anyway
- reg = self.xrm.try_allocate_reg(arg)
- else:
- reg = self.rm.try_allocate_reg(arg)
- if reg:
- loc = reg
+ box = inputargs[i]
+ assert isinstance(box, Box)
+ #
+ if box.type == FLOAT:
+ cur_frame_pos -= 2
else:
- loc = self.fm.loc(arg)
- if arg.type == FLOAT:
- floatlocs[i] = loc
- else:
- nonfloatlocs[i] = loc
- # otherwise we have it saved on stack, so no worry
- self.rm.free_regs.insert(0, tmpreg)
- self.xrm.free_regs.insert(0, xmmtmp)
- assert tmpreg not in nonfloatlocs
- assert xmmtmp not in floatlocs
- # note: we need to make a copy of inputargs because possibly_free_vars
- # is also used on op args, which is a non-resizable list
- self.possibly_free_vars(list(inputargs))
- return nonfloatlocs, floatlocs
+ cur_frame_pos -= 1
+ loc = self.fm.frame_pos(cur_frame_pos, box.type)
+ self.fm.set_binding(box, loc)
def possibly_free_var(self, var):
if var.type == FLOAT:
@@ -458,7 +446,7 @@
# only to guard operations or to jump or to finish
produced = {}
last_used = {}
- useful = {}
+ #useful = {}
for i in range(len(operations)-1, -1, -1):
op = operations[i]
if op.result:
@@ -469,8 +457,8 @@
opnum = op.getopnum()
for j in range(op.numargs()):
arg = op.getarg(j)
- if opnum != rop.JUMP and opnum != rop.FINISH:
- useful[arg] = None
+ #if opnum != rop.JUMP and opnum != rop.FINISH:
+ # useful[arg] = None
if isinstance(arg, Box) and arg not in last_used:
last_used[arg] = i
if op.is_guard():
@@ -496,7 +484,7 @@
longevity[arg] = (0, last_used[arg])
del last_used[arg]
assert len(last_used) == 0
- return longevity, useful
+ return longevity#, useful
def loc(self, v):
if v is None: # xxx kludgy
@@ -1451,12 +1439,12 @@
tmpreg = X86RegisterManager.all_regs[0]
tmpvar = TempBox()
self.rm.force_allocate_reg(tmpvar, selected_reg=tmpreg)
- self.rm.possibly_free_var(tmpvar)
+ self.rm.possibly_free_var(tmpvar, _hint_dont_reuse_quickly=True)
#
xmmtmp = X86XMMRegisterManager.all_regs[0]
tmpvar = TempBox()
self.xrm.force_allocate_reg(tmpvar, selected_reg=xmmtmp)
- self.xrm.possibly_free_var(tmpvar)
+ self.xrm.possibly_free_var(tmpvar, _hint_dont_reuse_quickly=True)
#
# we need to make sure that no variable is stored in ebp
for arg in inputargs:
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -44,7 +44,6 @@
_location_code = 'b'
def __init__(self, position, ebp_offset, num_words, type):
- assert ebp_offset < 0 # so no confusion with RegLoc.value
self.position = position
self.value = ebp_offset
self.width = num_words * WORD
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -117,7 +117,7 @@
FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, lltype.Signed))
#
def execute_token(executable_token, *args):
- addr = executable_token._x86_direct_bootstrap_code
+ addr = executable_token._x86_function_addr
func = rffi.cast(FUNCPTR, addr)
#llop.debug_print(lltype.Void, ">>>> Entering", addr)
prev_interpreter = None # help flow space
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -328,9 +328,8 @@
inputargs = [i for i in (a, b) if isinstance(i, Box)]
looptoken = JitCellToken()
self.cpu.compile_loop(inputargs, ops, looptoken)
- for i, box in enumerate(inputargs):
- self.cpu.set_future_value_int(i, box.value)
- self.cpu.execute_token(looptoken)
+ inputvalues = [box.value for box in inputargs]
+ self.cpu.execute_token(looptoken, *inputvalues)
result = self.cpu.get_latest_value_int(0)
expected = execute(self.cpu, None, op, None, a, b).value
if guard == rop.GUARD_FALSE:
@@ -396,8 +395,7 @@
assert address >= loopaddress + loopsize
assert size >= 10 # randomish number
- self.cpu.set_future_value_int(0, 2)
- fail = self.cpu.execute_token(looptoken)
+ fail = self.cpu.execute_token(looptoken, 2)
assert fail.identifier == 2
res = self.cpu.get_latest_value_int(0)
assert res == 20
@@ -503,9 +501,7 @@
looptoken = JitCellToken()
self.cpu.compile_loop([i1, i2], ops, looptoken)
- self.cpu.set_future_value_int(0, 123450)
- self.cpu.set_future_value_int(1, 123408)
- fail = self.cpu.execute_token(looptoken)
+ fail = self.cpu.execute_token(looptoken, 123450, 123408)
assert fail.identifier == 0
assert self.cpu.get_latest_value_int(0) == 42
assert self.cpu.get_latest_value_int(1) == 42
@@ -537,8 +533,7 @@
self.cpu.assembler.set_debug(True)
looptoken = JitCellToken()
self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
- self.cpu.set_future_value_int(0, 0)
- self.cpu.execute_token(looptoken)
+ self.cpu.execute_token(looptoken, 0)
# check debugging info
struct = self.cpu.assembler.loop_run_counters[0]
assert struct.i == 10
@@ -561,7 +556,6 @@
self.cpu.assembler.set_debug(True)
looptoken = JitCellToken()
self.cpu.compile_loop(ops.inputargs, ops.operations, looptoken)
- self.cpu.set_future_value_int(0, 0)
- self.cpu.execute_token(looptoken)
+ self.cpu.execute_token(looptoken, 0)
assert looptoken._x86_debug_checksum == sum([op.getopnum()
for op in ops.operations])
More information about the pypy-commit
mailing list