[pypy-svn] r75667 - in pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86: . test
jcreigh at codespeak.net
jcreigh at codespeak.net
Tue Jun 29 21:13:14 CEST 2010
Author: jcreigh
Date: Tue Jun 29 21:12:33 2010
New Revision: 75667
Modified:
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86.py
Log:
handle case on 64-bit where machine code blocks are more than 32-bits apart
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py Tue Jun 29 21:12:33 2010
@@ -43,16 +43,30 @@
class MachineCodeBlockWrapper(object):
MC_DEFAULT_SIZE = 1024*1024
- def __init__(self, bigsize, profile_agent=None):
+ def __init__(self, assembler, bigsize, profile_agent=None):
+ self.assembler = assembler
self.old_mcs = [] # keepalive
self.bigsize = bigsize
self._mc = self._instantiate_mc()
self.function_name = None
self.profile_agent = profile_agent
+ self.reset_reserved_bytes()
def _instantiate_mc(self): # hook for testing
return codebuf.MachineCodeBlock(self.bigsize)
+ def ensure_bytes_available(self, num_bytes):
+ if self.bytes_free() <= (self._reserved_bytes + num_bytes):
+ self.make_new_mc()
+
+ def reserve_bytes(self, num_bytes):
+ self.ensure_bytes_available(num_bytes)
+ self._reserved_bytes += num_bytes
+
+ def reset_reserved_bytes(self):
+ # XXX er.... pretty random number, just to be sure
+ # not to write half-instruction
+ self._reserved_bytes = 64
def bytes_free(self):
return self._mc._size - self._mc.get_relative_pos()
@@ -74,12 +88,14 @@
def make_new_mc(self):
new_mc = self._instantiate_mc()
debug_print('[new machine code block at', new_mc.tell(), ']')
- self._mc.JMP_l(new_mc.tell())
+ self._mc.JMP(imm(new_mc.tell()))
if self.function_name is not None:
self.end_function(done=False)
self.start_pos = new_mc.get_relative_pos()
+ self.assembler.write_pending_failure_recoveries()
+
self._mc.done()
self.old_mcs.append(self._mc)
self._mc = new_mc
@@ -93,9 +109,7 @@
def _new_method(name):
def method(self, *args):
- # XXX er.... pretty random number, just to be sure
- # not to write half-instruction
- if self.bytes_free() < 64:
+ if self.bytes_free() < self._reserved_bytes:
self.make_new_mc()
getattr(self._mc, name)(*args)
method.func_name = name
@@ -109,11 +123,16 @@
setattr(MachineCodeBlockWrapper, name, _new_method(name))
class GuardToken(object):
- def __init__(self, faildescr, failargs, fail_locs, exc):
+ def __init__(self, faildescr, failargs, fail_locs, exc, desc_bytes):
self.faildescr = faildescr
self.failargs = failargs
self.fail_locs = fail_locs
self.exc = exc
+ self.desc_bytes = desc_bytes
+
+ def recovery_stub_size(self):
+ # XXX: 32 is pulled out of the air
+ return 32 + len(self.desc_bytes)
class Assembler386(object):
mc = None
@@ -174,7 +193,7 @@
self.assembler_helper_adr = self.cpu.cast_ptr_to_int(
self.cpu.assembler_helper_ptr)
- self.mc = MachineCodeBlockWrapper(self.mc_size, self.cpu.profile_agent)
+ self.mc = MachineCodeBlockWrapper(self, self.mc_size, self.cpu.profile_agent)
self._build_failure_recovery(False)
self._build_failure_recovery(True)
if self.cpu.supports_floats:
@@ -244,9 +263,6 @@
regalloc = RegAlloc(self, self.cpu.translate_support_code)
arglocs = regalloc.prepare_loop(inputargs, operations, looptoken)
looptoken._x86_arglocs = arglocs
- needed_mem = len(arglocs[0]) * 16 + 16
- if needed_mem >= self.mc.bytes_free():
- self.mc.make_new_mc()
# profile support
name = "Loop # %s: %s" % (looptoken.number, funcname)
@@ -261,11 +277,7 @@
self._patch_stackadjust(adr_stackadjust, frame_depth+param_depth)
looptoken._x86_frame_depth = frame_depth
looptoken._x86_param_depth = param_depth
- # we need to make sure here that we don't overload an mc badly.
- # a safe estimate is that we need at most 16 bytes per arg
- needed_mem = len(arglocs[0]) * 16 + 16
- if needed_mem >= self.mc.bytes_free():
- self.mc.make_new_mc()
+
looptoken._x86_direct_bootstrap_code = self.mc.tell()
self._assemble_bootstrap_direct_call(arglocs, curadr,
frame_depth+param_depth)
@@ -311,10 +323,15 @@
def write_pending_failure_recoveries(self):
for tok in self.pending_guard_tokens:
- addr = self.generate_quick_failure(tok.faildescr, tok.failargs, tok.fail_locs, tok.exc)
+ # Okay to write to _mc because we've already made sure that
+ # there's enough space by "reserving" bytes.
+ addr = self.generate_quick_failure(self.mc._mc, tok.faildescr, tok.failargs, tok.fail_locs, tok.exc, tok.desc_bytes)
+ tok.faildescr._x86_adr_recovery_stub = addr
self.patch_jump_for_descr(tok.faildescr, addr)
self.pending_guard_tokens = []
+ self.mc.reset_reserved_bytes()
+ self.mc.done()
def _find_debug_merge_point(self, operations):
for op in operations:
@@ -324,8 +341,21 @@
def patch_jump_for_descr(self, faildescr, adr_new_target):
adr_jump_offset = faildescr._x86_adr_jump_offset
- mc = codebuf.InMemoryCodeBuilder(adr_jump_offset, adr_jump_offset + 4)
- mc.writeimm32(adr_new_target - adr_jump_offset - 4)
+ adr_recovery_stub = faildescr._x86_adr_recovery_stub
+ offset = adr_new_target - (adr_jump_offset + 4)
+ # If the new target fits within a rel32 of the jump, just patch
+ # that. Otherwise, leave the original rel32 to the recovery stub in
+ # place, but clobber the recovery stub with a jump to the real
+ # target.
+ if rx86.fits_in_32bits(offset):
+ mc = codebuf.InMemoryCodeBuilder(adr_jump_offset, adr_jump_offset + 4)
+ mc.writeimm32(offset)
+ else:
+ # "mov r11, addr; jmp r11" is 13 bytes
+ mc = codebuf.InMemoryCodeBuilder(adr_recovery_stub, adr_recovery_stub + 13)
+ mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
+ mc.JMP_r(X86_64_SCRATCH_REG.value)
+
mc.valgrind_invalidated()
mc.done()
@@ -434,6 +464,7 @@
if len(unused_gpr) > 0:
self.mc.MOV(loc, unused_gpr.pop())
else:
+ self.mc.ensure_bytes_available(32)
self.mc.MOV_rb(X86_64_SCRATCH_REG.value, (2 + i) * WORD)
self.mc.MOV(loc, X86_64_SCRATCH_REG)
@@ -446,8 +477,7 @@
self.mc.MOVSD_xb(X86_64_XMM_SCRATCH_REG.value, (2 + i) * WORD)
self.mc.MOVSD(loc, X86_64_XMM_SCRATCH_REG)
-
- self.mc.JMP_l(jmpadr)
+ self.mc.JMP(imm(jmpadr))
return adr_stackadjust
@@ -568,7 +598,6 @@
guard_token = self.implement_guard_recovery(guard_opnum,
faildescr, failargs,
faillocs)
- self.pending_guard_tokens.append(guard_token)
if op is None:
dispatch_opnum = guard_opnum
else:
@@ -615,6 +644,9 @@
tmp2 = result_loc.higher8bits()
elif IS_X86_64:
tmp2 = X86_64_SCRATCH_REG.lowest8bits()
+ # We can't do a jump in the middle below, because that could
+ # clobber the scratch register
+ self.mc.ensure_bytes_available(32)
self.mc.SET_ir(rx86.Conditions[cond], tmp1.value)
if is_ne:
@@ -648,16 +680,17 @@
result_loc):
guard_opnum = guard_op.opnum
self.mc.UCOMISD(arglocs[0], arglocs[1])
+ # 16 is enough space for the rel8 jumps below and the rel32
+ # jump in implement_guard
+ self.mc.ensure_bytes_available(16 + guard_token.recovery_stub_size())
if guard_opnum == rop.GUARD_FALSE:
- mc = self.mc._mc
if need_jp:
- mc.J_il8(rx86.Conditions['P'], 6)
+ self.mc.J_il8(rx86.Conditions['P'], 6)
return self.implement_guard(guard_token, cond)
else:
if need_jp:
- mc = self.mc._mc
- mc.J_il8(rx86.Conditions['P'], 2)
- mc.J_il8(rx86.Conditions[cond], 5)
+ self.mc.J_il8(rx86.Conditions['P'], 2)
+ self.mc.J_il8(rx86.Conditions[cond], 5)
return self.implement_guard(guard_token)
return self.implement_guard(guard_token, false_cond)
return genop_cmp_guard_float
@@ -734,6 +767,8 @@
mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
else:
+ if not force_mc:
+ mc.ensure_bytes_available(32)
mc.MOV(X86_64_SCRATCH_REG, loc)
mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
else:
@@ -847,13 +882,15 @@
def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
guard_opnum = guard_op.opnum
self.mc.UCOMISD(arglocs[0], arglocs[1])
- mc = self.mc._mc
+ # 16 is enough space for the rel8 jumps below and the rel32
+ # jump in implement_guard
+ self.mc.ensure_bytes_available(16 + guard_token.recovery_stub_size())
if guard_opnum == rop.GUARD_TRUE:
- mc.J_il8(rx86.Conditions['P'], 6)
+ self.mc.J_il8(rx86.Conditions['P'], 6)
return self.implement_guard(guard_token, 'E')
else:
- mc.J_il8(rx86.Conditions['P'], 2)
- mc.J_il8(rx86.Conditions['E'], 5)
+ self.mc.J_il8(rx86.Conditions['P'], 2)
+ self.mc.J_il8(rx86.Conditions['E'], 5)
return self.implement_guard(guard_token)
def genop_float_neg(self, op, arglocs, resloc):
@@ -1190,9 +1227,10 @@
exc = (guard_opnum == rop.GUARD_EXCEPTION or
guard_opnum == rop.GUARD_NO_EXCEPTION or
guard_opnum == rop.GUARD_NOT_FORCED)
- return GuardToken(faildescr, failargs, fail_locs, exc)
+ desc_bytes = self.failure_recovery_description(failargs, fail_locs)
+ return GuardToken(faildescr, failargs, fail_locs, exc, desc_bytes)
- def generate_quick_failure(self, faildescr, failargs, fail_locs, exc):
+ def generate_quick_failure(self, mc, faildescr, failargs, fail_locs, exc, desc_bytes):
"""Generate the initial code for handling a failure. We try to
keep it as compact as possible. The idea is that this code is
executed at most once (and very often, zero times); when
@@ -1200,20 +1238,17 @@
really handle recovery from this particular failure.
"""
fail_index = self.cpu.get_fail_descr_number(faildescr)
- bytes_needed = 20 + 5 * len(failargs) # conservative estimate
- if self.mc.bytes_free() < bytes_needed:
- self.mc.make_new_mc()
- mc = self.mc._mc
addr = mc.tell()
withfloats = False
for box in failargs:
if box is not None and box.type == FLOAT:
withfloats = True
break
- mc.CALL_l(self.failure_recovery_code[exc + 2 * withfloats])
+ mc.CALL(imm(self.failure_recovery_code[exc + 2 * withfloats]))
# write tight data that describes the failure recovery
faildescr._x86_failure_recovery_bytecode = mc.tell()
- self.write_failure_recovery_description(mc, failargs, fail_locs)
+ for byte in desc_bytes:
+ mc.writechr(ord(byte))
# write the fail_index too
mc.writeimm32(fail_index)
# for testing the decoding, write a final byte 0xCC
@@ -1221,6 +1256,12 @@
mc.writechr(0xCC)
faildescr._x86_debug_faillocs = [loc for loc in fail_locs
if loc is not None]
+
+ # Make sure the recovery stub is at least 16 bytes long (for the
+ # case where we overwrite the recovery stub with a 64-bit absolute
+ # jump)
+ while mc.tell() - addr < 16:
+ mc.writechr(0x00)
return addr
DESCR_REF = 0x00
@@ -1232,7 +1273,8 @@
CODE_STOP = 0 | DESCR_SPECIAL
CODE_HOLE = 4 | DESCR_SPECIAL
- def write_failure_recovery_description(self, mc, failargs, locs):
+ def failure_recovery_description(self, failargs, locs):
+ desc_bytes = []
for i in range(len(failargs)):
arg = failargs[i]
if arg is not None:
@@ -1252,14 +1294,19 @@
n = loc.value
n = kind + 4*n
while n > 0x7F:
- mc.writechr((n & 0x7F) | 0x80)
+ desc_bytes.append(chr((n & 0x7F) | 0x80))
n >>= 7
else:
n = self.CODE_HOLE
- mc.writechr(n)
- mc.writechr(self.CODE_STOP)
+ desc_bytes.append(chr(n))
+ desc_bytes.append(chr(self.CODE_STOP))
# assert that the fail_boxes lists are big enough
assert len(failargs) <= self.fail_boxes_int.SIZE
+ return desc_bytes
+
+ def write_failure_recovery_description(self, mc, failargs, locs):
+ for byte in self.failure_recovery_description(failargs, locs):
+ mc.writechr(ord(byte))
def rebuild_faillocs_from_descr(self, bytecode):
from pypy.jit.backend.x86.regalloc import X86FrameManager
@@ -1421,7 +1468,7 @@
# avoid unwarranted freeing
# - optionally save exception depending on the flag
addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
- mc.CALL_l(addr)
+ mc.CALL(imm(addr))
# the following call saves all values from the stack and from
# registers to the right 'fail_boxes_<type>' location.
@@ -1442,7 +1489,7 @@
else:
raise AssertionError("Shouldn't happen")
- mc.CALL_l(failure_recovery_func)
+ mc.CALL(imm(failure_recovery_func))
# returns in eax the fail_index
# now we return from the complete frame, which starts from
@@ -1491,7 +1538,7 @@
# avoid unwarranted freeing
# - optionally save exception depending on the flag
addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
- mc.CALL_l(addr)
+ mc.CALL(imm(addr))
mc.MOV(eax, imm(fail_index))
@@ -1502,7 +1549,10 @@
# what to do with @specialize
@specialize.arg(2)
def implement_guard(self, guard_token, condition=None):
- # These jumps are patched later
+ self.mc.reserve_bytes(guard_token.recovery_stub_size())
+ self.pending_guard_tokens.append(guard_token)
+ # XXX: These jumps are patched later, the self.mc.tell() are just
+ # dummy values
if condition:
self.mc.J_il(rx86.Conditions[condition], self.mc.tell())
else:
@@ -1697,7 +1747,7 @@
return loop_token._x86_arglocs
def closing_jump(self, loop_token):
- self.mc.JMP_l(loop_token._x86_loop_code)
+ self.mc.JMP(imm(loop_token._x86_loop_code))
def malloc_cond_fixedsize(self, nursery_free_adr, nursery_top_adr,
size, tid):
@@ -1723,10 +1773,10 @@
# reserve room for the argument to the real malloc and the
# 8 saved XMM regs
self._regalloc.reserve_param(1+16)
- mc.CALL_l(slowpath_addr1)
+ mc.CALL(imm(slowpath_addr1))
self.mark_gc_roots()
slowpath_addr2 = self.malloc_fixedsize_slowpath2
- mc.CALL_l(slowpath_addr2)
+ mc.CALL(imm(slowpath_addr2))
offset = mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py Tue Jun 29 21:12:33 2010
@@ -224,6 +224,25 @@
return func_with_new_name(INSN, "INSN_" + name)
+ def _relative_unaryop(name):
+ def INSN(self, loc):
+ code = loc.location_code()
+ for possible_code in unrolling_location_codes:
+ if code == possible_code:
+ val = getattr(loc, "value_" + possible_code)()
+ if self.WORD == 8 and possible_code == 'i':
+ offset = val - (self.tell() + 5)
+ if rx86.fits_in_32bits(offset):
+ _rx86_getattr(self, name + "_l")(val)
+ else:
+ self.MOV_ri(X86_64_SCRATCH_REG.value, val)
+ _rx86_getattr(self, name + "_r")(X86_64_SCRATCH_REG.value)
+ else:
+ methname = name + "_" + possible_code
+ _rx86_getattr(self, methname)(val)
+
+ return func_with_new_name(INSN, "INSN_" + name)
+
def _16_bit_binaryop(name):
def INSN(self, loc1, loc2):
# Select 16-bit operand mode
@@ -306,8 +325,8 @@
ANDPD = _binaryop('ANDPD')
XORPD = _binaryop('XORPD')
- CALL = _unaryop('CALL')
- JMP = _unaryop('JMP')
+ CALL = _relative_unaryop('CALL')
+ JMP = _relative_unaryop('JMP')
def imm(x):
# XXX: ri386 migration shim
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py Tue Jun 29 21:12:33 2010
@@ -575,17 +575,6 @@
else:
AbstractX86CodeBuilder.MOV_ri(self, reg, immed)
- # case of a 64-bit immediate: encode via RAX (assuming it's ok to
- # randomly change this register at that point in time)
- def CALL_l(self, target):
- offset = target - (self.tell() + 5)
- if fits_in_32bits(offset):
- AbstractX86CodeBuilder.CALL_l(self, target)
- else:
- self.MOV_ri(R.eax, target)
- self.CALL_r(R.eax)
-
-
def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR'):
def add_insn(code, *modrm):
args = before_modrm + list(modrm) + after_modrm
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py Tue Jun 29 21:12:33 2010
@@ -26,11 +26,14 @@
return self.base_address + len(self.content)
def get_relative_pos(self):
return len(self.content)
- def JMP_l(self, *args):
+ def JMP(self, *args):
self.content.append(("JMP", args))
def done(self):
pass
+class FakeAssembler:
+ def write_pending_failure_recoveries(self):
+ pass
def test_write_failure_recovery_description():
assembler = Assembler386(FakeCPU())
@@ -257,7 +260,8 @@
def test_mc_wrapper_profile_agent():
agent = FakeProfileAgent()
- mc = FakeMCWrapper(100, agent)
+ assembler = FakeAssembler()
+ mc = FakeMCWrapper(assembler, 100, agent)
mc.start_function("abc")
mc.writechr("x")
mc.writechr("x")
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py Tue Jun 29 21:12:33 2010
@@ -399,8 +399,6 @@
assert self.cpu.get_latest_value_int(0) == 1024
def test_overflow_guard_float_cmp(self):
- # FIXME: Skipping for now
- import py.test; py.test.skip()
# The float comparisons on x86 tend to use small relative jumps,
# which may run into trouble if they fall on the edge of a
# MachineCodeBlock change.
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86.py Tue Jun 29 21:12:33 2010
@@ -208,12 +208,3 @@
s = CodeBuilder64()
s.MOV_rm(edx, (edi, -1))
assert s.getvalue() == '\x48\x8B\x57\xFF'
-
-def test_call_l_64():
- # first check that it works there too
- test_call_l(CodeBuilder64())
- # then check the other case
- s = CodeBuilder64()
- target = 0x0123456789ABCDEF
- s.CALL_l(target) # becomes indirect, via RAX
- assert s.getvalue() == '\x48\xB8' + struct.pack("<q", target) + '\xFF\xD0'
More information about the Pypy-commit
mailing list