[pypy-svn] r76438 - in pypy/trunk: . pypy/config pypy/jit/backend pypy/jit/backend/llsupport pypy/jit/backend/test pypy/jit/backend/x86 pypy/jit/backend/x86/test pypy/jit/backend/x86/tool pypy/module/signal
jcreigh at codespeak.net
jcreigh at codespeak.net
Mon Aug 2 19:52:18 CEST 2010
Author: jcreigh
Date: Mon Aug 2 19:52:15 2010
New Revision: 76438
Added:
pypy/trunk/pypy/jit/backend/x86/arch.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/arch.py
pypy/trunk/pypy/jit/backend/x86/regloc.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py
pypy/trunk/pypy/jit/backend/x86/rx86.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py
pypy/trunk/pypy/jit/backend/x86/test/test_regloc.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regloc.py
pypy/trunk/pypy/jit/backend/x86/test/test_rx86.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86.py
pypy/trunk/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
pypy/trunk/pypy/jit/backend/x86/test/test_rx86_64_auto_encoding.py
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86_64_auto_encoding.py
pypy/trunk/pypy/jit/backend/x86/tool/instruction_encoding.sh
- copied unchanged from r76437, pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/tool/instruction_encoding.sh
Removed:
pypy/trunk/pypy/jit/backend/x86/ri386.py
pypy/trunk/pypy/jit/backend/x86/ri386setup.py
pypy/trunk/pypy/jit/backend/x86/test/test_ri386.py
pypy/trunk/pypy/jit/backend/x86/test/test_ri386_auto_encoding.py
Modified:
pypy/trunk/ (props changed)
pypy/trunk/pypy/config/translationoption.py
pypy/trunk/pypy/jit/backend/detect_cpu.py
pypy/trunk/pypy/jit/backend/llsupport/regalloc.py
pypy/trunk/pypy/jit/backend/test/runner_test.py
pypy/trunk/pypy/jit/backend/x86/assembler.py
pypy/trunk/pypy/jit/backend/x86/codebuf.py
pypy/trunk/pypy/jit/backend/x86/jump.py
pypy/trunk/pypy/jit/backend/x86/regalloc.py
pypy/trunk/pypy/jit/backend/x86/runner.py
pypy/trunk/pypy/jit/backend/x86/test/conftest.py
pypy/trunk/pypy/jit/backend/x86/test/test_assembler.py
pypy/trunk/pypy/jit/backend/x86/test/test_basic.py
pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py
pypy/trunk/pypy/jit/backend/x86/test/test_jump.py
pypy/trunk/pypy/jit/backend/x86/test/test_recompilation.py
pypy/trunk/pypy/jit/backend/x86/test/test_regalloc.py
pypy/trunk/pypy/jit/backend/x86/test/test_regalloc2.py
pypy/trunk/pypy/jit/backend/x86/test/test_runner.py
pypy/trunk/pypy/jit/backend/x86/test/test_symbolic_x86.py
pypy/trunk/pypy/jit/backend/x86/test/test_zll_random.py
pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py
pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py
pypy/trunk/pypy/jit/backend/x86/tool/viewcode.py
pypy/trunk/pypy/module/signal/interp_signal.py
Log:
merge x86-64-jit-backend to trunk
Modified: pypy/trunk/pypy/config/translationoption.py
==============================================================================
--- pypy/trunk/pypy/config/translationoption.py (original)
+++ pypy/trunk/pypy/config/translationoption.py Mon Aug 2 19:52:15 2010
@@ -342,6 +342,10 @@
'jit': 'hybrid extraopts jit',
}
+# For now, 64-bit JIT requires boehm
+if IS_64_BITS:
+ OPT_TABLE['jit'] = OPT_TABLE['jit'].replace('hybrid', 'boehm')
+
def set_opt_level(config, level):
"""Apply optimization suggestions on the 'config'.
The optimizations depend on the selected level and possibly on the backend.
Modified: pypy/trunk/pypy/jit/backend/detect_cpu.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/detect_cpu.py (original)
+++ pypy/trunk/pypy/jit/backend/detect_cpu.py Mon Aug 2 19:52:15 2010
@@ -56,6 +56,8 @@
return "pypy.jit.backend.x86.runner", "CPU"
elif backend_name == 'x86-without-sse2':
return "pypy.jit.backend.x86.runner", "CPU386_NO_SSE2"
+ elif backend_name == 'x86_64':
+ return "pypy.jit.backend.x86.runner", "CPU_X86_64"
elif backend_name == 'cli':
return "pypy.jit.backend.cli.runner", "CliCPU"
elif backend_name == 'llvm':
Modified: pypy/trunk/pypy/jit/backend/llsupport/regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llsupport/regalloc.py (original)
+++ pypy/trunk/pypy/jit/backend/llsupport/regalloc.py Mon Aug 2 19:52:15 2010
@@ -22,18 +22,19 @@
def get(self, box):
return self.frame_bindings.get(box, None)
- def loc(self, box, size):
+ def loc(self, box):
res = self.get(box)
if res is not None:
return res
- newloc = self.frame_pos(self.frame_depth, size)
+ newloc = self.frame_pos(self.frame_depth, box.type)
self.frame_bindings[box] = newloc
- self.frame_depth += size
+ # Objects returned by frame_pos must support frame_size()
+ self.frame_depth += newloc.frame_size()
return newloc
# abstract methods that need to be overwritten for specific assemblers
@staticmethod
- def frame_pos(loc, size):
+ def frame_pos(loc, type):
raise NotImplementedError("Purely abstract")
class RegisterManager(object):
@@ -43,7 +44,6 @@
all_regs = []
no_lower_byte_regs = []
save_around_call_regs = []
- reg_width = 1 # in terms of stack space eaten
def __init__(self, longevity, frame_manager=None, assembler=None):
self.free_regs = self.all_regs[:]
@@ -148,7 +148,7 @@
loc = self.reg_bindings[v_to_spill]
del self.reg_bindings[v_to_spill]
if self.frame_manager.get(v_to_spill) is None:
- newloc = self.frame_manager.loc(v_to_spill, self.reg_width)
+ newloc = self.frame_manager.loc(v_to_spill)
self.assembler.regalloc_mov(loc, newloc)
return loc
@@ -204,7 +204,7 @@
try:
return self.reg_bindings[box]
except KeyError:
- return self.frame_manager.loc(box, self.reg_width)
+ return self.frame_manager.loc(box)
def return_constant(self, v, forbidden_vars=[], selected_reg=None,
imm_fine=True):
@@ -260,7 +260,7 @@
self.reg_bindings[v] = loc
self.assembler.regalloc_mov(prev_loc, loc)
else:
- loc = self.frame_manager.loc(v, self.reg_width)
+ loc = self.frame_manager.loc(v)
self.assembler.regalloc_mov(prev_loc, loc)
def force_result_in_reg(self, result_v, v, forbidden_vars=[]):
@@ -280,7 +280,7 @@
self.free_regs = [reg for reg in self.free_regs if reg is not loc]
return loc
if v not in self.reg_bindings:
- prev_loc = self.frame_manager.loc(v, self.reg_width)
+ prev_loc = self.frame_manager.loc(v)
loc = self.force_allocate_reg(v, forbidden_vars)
self.assembler.regalloc_mov(prev_loc, loc)
assert v in self.reg_bindings
@@ -300,7 +300,7 @@
def _sync_var(self, v):
if not self.frame_manager.get(v):
reg = self.reg_bindings[v]
- to = self.frame_manager.loc(v, self.reg_width)
+ to = self.frame_manager.loc(v)
self.assembler.regalloc_mov(reg, to)
# otherwise it's clean
Modified: pypy/trunk/pypy/jit/backend/test/runner_test.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/test/runner_test.py (original)
+++ pypy/trunk/pypy/jit/backend/test/runner_test.py Mon Aug 2 19:52:15 2010
@@ -461,6 +461,25 @@
[funcbox] + args,
'float', descr=calldescr)
assert abs(res.value - 4.6) < 0.0001
+
+ def test_call_many_arguments(self):
+ # Test calling a function with a large number of arguments (more than
+ # 6, which will force passing some arguments on the stack on 64-bit)
+
+ def func(*args):
+ assert len(args) == 16
+ # Try to sum up args in a way that would probably detect a
+ # transposed argument
+ return sum(arg * (2**i) for i, arg in enumerate(args))
+
+ FUNC = self.FuncType([lltype.Signed]*16, lltype.Signed)
+ FPTR = self.Ptr(FUNC)
+ calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+ func_ptr = llhelper(FPTR, func)
+ args = range(16)
+ funcbox = self.get_funcbox(self.cpu, func_ptr)
+ res = self.execute_operation(rop.CALL, [funcbox] + map(BoxInt, args), 'int', descr=calldescr)
+ assert res.value == func(*args)
def test_call_stack_alignment(self):
# test stack alignment issues, notably for Mac OS/X.
@@ -978,6 +997,8 @@
else:
assert 0
operations.append(ResOperation(opnum, boxargs, boxres))
+ # Unique-ify inputargs
+ inputargs = list(set(inputargs))
faildescr = BasicFailDescr(1)
operations.append(ResOperation(rop.FINISH, [], None,
descr=faildescr))
@@ -1050,9 +1071,11 @@
descr=BasicFailDescr(5))]
operations[1].fail_args = []
looptoken = LoopToken()
- self.cpu.compile_loop(list(testcase), operations,
+ # Use "set" to unique-ify inputargs
+ unique_testcase_list = list(set(testcase))
+ self.cpu.compile_loop(unique_testcase_list, operations,
looptoken)
- for i, box in enumerate(testcase):
+ for i, box in enumerate(unique_testcase_list):
self.cpu.set_future_value_float(i, box.value)
fail = self.cpu.execute_token(looptoken)
if fail.identifier != 5 - (expected_id^expected):
@@ -1695,7 +1718,7 @@
def test_assembler_call(self):
called = []
def assembler_helper(failindex, virtualizable):
- assert self.cpu.get_latest_value_int(0) == 10
+ assert self.cpu.get_latest_value_int(0) == 97
called.append(failindex)
return 4 + 9
@@ -1708,33 +1731,41 @@
_assembler_helper_ptr)
ops = '''
- [i0, i1]
- i2 = int_add(i0, i1)
- finish(i2)'''
+ [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
+ i10 = int_add(i0, i1)
+ i11 = int_add(i10, i2)
+ i12 = int_add(i11, i3)
+ i13 = int_add(i12, i4)
+ i14 = int_add(i13, i5)
+ i15 = int_add(i14, i6)
+ i16 = int_add(i15, i7)
+ i17 = int_add(i16, i8)
+ i18 = int_add(i17, i9)
+ finish(i18)'''
loop = parse(ops)
looptoken = LoopToken()
looptoken.outermost_jitdriver_sd = FakeJitDriverSD()
self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
- ARGS = [lltype.Signed, lltype.Signed]
+ ARGS = [lltype.Signed] * 10
RES = lltype.Signed
self.cpu.portal_calldescr = self.cpu.calldescrof(
lltype.Ptr(lltype.FuncType(ARGS, RES)), ARGS, RES)
- self.cpu.set_future_value_int(0, 1)
- self.cpu.set_future_value_int(1, 2)
+ for i in range(10):
+ self.cpu.set_future_value_int(i, i+1)
res = self.cpu.execute_token(looptoken)
- assert self.cpu.get_latest_value_int(0) == 3
+ assert self.cpu.get_latest_value_int(0) == 55
ops = '''
- [i4, i5]
- i6 = int_add(i4, 1)
- i3 = call_assembler(i6, i5, descr=looptoken)
+ [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9]
+ i10 = int_add(i0, 42)
+ i11 = call_assembler(i10, i1, i2, i3, i4, i5, i6, i7, i8, i9, descr=looptoken)
guard_not_forced()[]
- finish(i3)
+ finish(i11)
'''
loop = parse(ops, namespace=locals())
othertoken = LoopToken()
self.cpu.compile_loop(loop.inputargs, loop.operations, othertoken)
- self.cpu.set_future_value_int(0, 4)
- self.cpu.set_future_value_int(1, 5)
+ for i in range(10):
+ self.cpu.set_future_value_int(i, i+1)
res = self.cpu.execute_token(othertoken)
assert self.cpu.get_latest_value_int(0) == 13
assert called
Modified: pypy/trunk/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/assembler.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/assembler.py Mon Aug 2 19:52:15 2010
@@ -7,24 +7,34 @@
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.annlowlevel import llhelper
from pypy.tool.uid import fixid
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD,\
- X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs, FRAME_FIXED_SIZE,\
- FORCE_INDEX_OFS
+from pypy.jit.backend.x86.regalloc import RegAlloc, \
+ X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs
+
+from pypy.jit.backend.x86.arch import FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD, IS_X86_32, IS_X86_64
+
+from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
+ esp, ebp, esi, edi,
+ xmm0, xmm1, xmm2, xmm3,
+ xmm4, xmm5, xmm6, xmm7,
+ r8, r9, r10, r11,
+ r12, r13, r14, r15,
+ X86_64_SCRATCH_REG,
+ X86_64_XMM_SCRATCH_REG,
+ RegLoc, StackLoc,
+ ImmedLoc, AddressLoc, imm)
+
from pypy.rlib.objectmodel import we_are_translated, specialize
-from pypy.jit.backend.x86 import codebuf
-from pypy.jit.backend.x86.ri386 import *
+from pypy.jit.backend.x86 import rx86, regloc, codebuf
from pypy.jit.metainterp.resoperation import rop
from pypy.jit.backend.x86.support import values_array
from pypy.rlib.debug import debug_print
from pypy.rlib import rgc
+from pypy.jit.backend.x86.jump import remap_frame_layout
from pypy.rlib.streamio import open_file_as_stream
-# our calling convention - we pass first 6 args in registers
-# and the rest stays on the stack
-
# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
# better safe than sorry
-CALL_ALIGN = 4
+CALL_ALIGN = 16 // WORD
def align_stack_words(words):
return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
@@ -32,16 +42,36 @@
class MachineCodeBlockWrapper(object):
MC_DEFAULT_SIZE = 1024*1024
- def __init__(self, bigsize, profile_agent=None):
+ def __init__(self, assembler, bigsize, profile_agent=None):
+ self.assembler = assembler
self.old_mcs = [] # keepalive
self.bigsize = bigsize
self._mc = self._instantiate_mc()
self.function_name = None
self.profile_agent = profile_agent
+ self.reset_reserved_bytes()
def _instantiate_mc(self): # hook for testing
return codebuf.MachineCodeBlock(self.bigsize)
+ def ensure_bytes_available(self, num_bytes):
+ if self.bytes_free() <= (self._reserved_bytes + num_bytes):
+ self.make_new_mc()
+
+ def reserve_bytes(self, num_bytes):
+ self.ensure_bytes_available(num_bytes)
+ self._reserved_bytes += num_bytes
+
+ def reset_reserved_bytes(self):
+ # XXX er.... pretty random number, just to be sure
+ # not to write half-instruction
+ self._reserved_bytes = 64
+
+ def get_relative_pos(self):
+ return self._mc.get_relative_pos()
+
+ def overwrite(self, pos, listofchars):
+ return self._mc.overwrite(pos, listofchars)
def bytes_free(self):
return self._mc._size - self._mc.get_relative_pos()
@@ -63,12 +93,25 @@
def make_new_mc(self):
new_mc = self._instantiate_mc()
debug_print('[new machine code block at', new_mc.tell(), ']')
- self._mc.JMP(rel32(new_mc.tell()))
+
+ if IS_X86_64:
+ # The scratch register is sometimes used as a temporary
+ # register, but the JMP below might clobber it. Rather than risk
+ # subtle bugs, we preserve the scratch register across the jump.
+ self._mc.PUSH_r(X86_64_SCRATCH_REG.value)
+
+ self._mc.JMP(imm(new_mc.tell()))
+
+ if IS_X86_64:
+ # Restore scratch reg
+ new_mc.POP_r(X86_64_SCRATCH_REG.value)
if self.function_name is not None:
self.end_function(done=False)
self.start_pos = new_mc.get_relative_pos()
+ self.assembler.write_pending_failure_recoveries()
+
self._mc.done()
self.old_mcs.append(self._mc)
self._mc = new_mc
@@ -82,21 +125,33 @@
def _new_method(name):
def method(self, *args):
- # XXX er.... pretty random number, just to be sure
- # not to write half-instruction
- if self.bytes_free() < 64:
+ if self.bytes_free() < self._reserved_bytes:
self.make_new_mc()
getattr(self._mc, name)(*args)
method.func_name = name
return method
+for _name in rx86.all_instructions + regloc.all_extra_instructions:
+ setattr(MachineCodeBlockWrapper, _name, _new_method(_name))
+
for name in dir(codebuf.MachineCodeBlock):
if name.upper() == name or name == "writechr":
setattr(MachineCodeBlockWrapper, name, _new_method(name))
+class GuardToken(object):
+ def __init__(self, faildescr, failargs, fail_locs, exc, desc_bytes):
+ self.faildescr = faildescr
+ self.failargs = failargs
+ self.fail_locs = fail_locs
+ self.exc = exc
+ self.desc_bytes = desc_bytes
+
+ def recovery_stub_size(self):
+ # XXX: 32 is pulled out of the air
+ return 32 + len(self.desc_bytes)
+
class Assembler386(object):
mc = None
- mc2 = None
mc_size = MachineCodeBlockWrapper.MC_DEFAULT_SIZE
_float_constants = None
_regalloc = None
@@ -118,10 +173,11 @@
self.loop_run_counter = values_array(lltype.Signed, 10000)
self.loop_names = []
# if we have 10000 loops, we have some other problems I guess
- self.loc_float_const_neg = None
- self.loc_float_const_abs = None
+ self.float_const_neg_addr = 0
+ self.float_const_abs_addr = 0
self.malloc_fixedsize_slowpath1 = 0
self.malloc_fixedsize_slowpath2 = 0
+ self.pending_guard_tokens = None
self.setup_failure_recovery()
self._loop_counter = 0
self._debug = False
@@ -134,7 +190,7 @@
def set_debug(self, v):
self._debug = v
- def make_sure_mc_exists(self):
+ def setup(self):
if self.mc is None:
# the address of the function called by 'new'
gc_ll_descr = self.cpu.gc_ll_descr
@@ -153,11 +209,7 @@
ll_new_unicode = gc_ll_descr.get_funcptr_for_newunicode()
self.malloc_unicode_func_addr = rffi.cast(lltype.Signed,
ll_new_unicode)
- # done
- # we generate the loop body in 'mc'
- # 'mc2' is for guard recovery code
- self.mc = MachineCodeBlockWrapper(self.mc_size, self.cpu.profile_agent)
- self.mc2 = MachineCodeBlockWrapper(self.mc_size)
+ self.mc = MachineCodeBlockWrapper(self, self.mc_size, self.cpu.profile_agent)
self._build_failure_recovery(False)
self._build_failure_recovery(True)
if self.cpu.supports_floats:
@@ -173,6 +225,12 @@
s = s.split(':')[-1]
self.set_debug(True)
self._output_loop_log = s + ".count"
+ # Intialize here instead of __init__ to prevent
+ # pending_guard_tokens from being considered a prebuilt object,
+ # which sometimes causes memory leaks since the prebuilt list is
+ # still considered a GC root after we re-assign
+ # pending_guard_tokens in write_pending_failure_recoveries
+ self.pending_guard_tokens = []
def finish_once(self):
if self._debug:
@@ -185,44 +243,48 @@
f.close()
def _build_float_constants(self):
- # 11 words: 8 words for the data, and up to 3 words for alignment
- addr = lltype.malloc(rffi.CArray(lltype.Signed), 11, flavor='raw')
+ # 44 bytes: 32 bytes for the data, and up to 12 bytes for alignment
+ addr = lltype.malloc(rffi.CArray(lltype.Char), 44, flavor='raw')
if not we_are_translated():
self._keepalive_malloced_float_consts = addr
float_constants = rffi.cast(lltype.Signed, addr)
float_constants = (float_constants + 15) & ~15 # align to 16 bytes
- addr = rffi.cast(rffi.CArrayPtr(lltype.Signed), float_constants)
- addr[0] = 0 # \
- addr[1] = -2147483648 # / for neg
- addr[2] = 0 #
- addr[3] = 0 #
- addr[4] = -1 # \
- addr[5] = 2147483647 # / for abs
- addr[6] = 0 #
- addr[7] = 0 #
- self.loc_float_const_neg = heap64(float_constants)
- self.loc_float_const_abs = heap64(float_constants + 16)
+ addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
+ qword_padding = '\x00\x00\x00\x00\x00\x00\x00\x00'
+ # 0x8000000000000000
+ neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80'
+ # 0x7FFFFFFFFFFFFFFF
+ abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
+ data = neg_const + qword_padding + abs_const + qword_padding
+ for i in range(len(data)):
+ addr[i] = data[i]
+ self.float_const_neg_addr = float_constants
+ self.float_const_abs_addr = float_constants + 16
def _build_malloc_fixedsize_slowpath(self):
- mc = self.mc2._mc
# ---------- first helper for the slow path of malloc ----------
- self.malloc_fixedsize_slowpath1 = mc.tell()
+ self.malloc_fixedsize_slowpath1 = self.mc.tell()
if self.cpu.supports_floats: # save the XMM registers in
- for i in range(8): # the *caller* frame, from esp+8
- mc.MOVSD(mem64(esp, 8+8*i), xmm_registers[i])
- mc.SUB(edx, eax) # compute the size we want
- mc.MOV(mem(esp, 4), edx) # save it as the new argument
+ for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
+ self.mc.MOVSD_sx((WORD*2)+8*i, i)
+ self.mc.SUB_rr(edx.value, eax.value) # compute the size we want
+ if IS_X86_32:
+ self.mc.MOV_sr(WORD, edx.value) # save it as the new argument
+ elif IS_X86_64:
+ # FIXME: We can't just clobber rdi like this, can we?
+ self.mc.MOV_rr(edi.value, edx.value)
+
addr = self.cpu.gc_ll_descr.get_malloc_fixedsize_slowpath_addr()
- mc.JMP(rel32(addr)) # tail call to the real malloc
+ self.mc.JMP(imm(addr)) # tail call to the real malloc
# ---------- second helper for the slow path of malloc ----------
- self.malloc_fixedsize_slowpath2 = mc.tell()
+ self.malloc_fixedsize_slowpath2 = self.mc.tell()
if self.cpu.supports_floats: # restore the XMM registers
- for i in range(8): # from where they were saved
- mc.MOVSD(xmm_registers[i], mem64(esp, 8+8*i))
+ for i in range(self.cpu.NUM_REGS):# from where they were saved
+ self.mc.MOVSD_xs(i, (WORD*2)+8*i)
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
- mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
- mc.RET()
- self.mc2.done()
+ self.mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
+ self.mc.RET()
+ self.mc.done()
def assemble_loop(self, inputargs, operations, looptoken):
"""adds the following attributes to looptoken:
@@ -233,15 +295,17 @@
_x86_param_depth
_x86_arglocs
"""
- self.make_sure_mc_exists()
+ if not we_are_translated():
+ # Arguments should be unique
+ assert len(set(inputargs)) == len(inputargs)
+
+ self.setup()
funcname = self._find_debug_merge_point(operations)
+
regalloc = RegAlloc(self, self.cpu.translate_support_code)
arglocs = regalloc.prepare_loop(inputargs, operations, looptoken)
looptoken._x86_arglocs = arglocs
- needed_mem = len(arglocs[0]) * 16 + 16
- if needed_mem >= self.mc.bytes_free():
- self.mc.make_new_mc()
# profile support
name = "Loop # %s: %s" % (looptoken.number, funcname)
@@ -256,21 +320,21 @@
self._patch_stackadjust(adr_stackadjust, frame_depth+param_depth)
looptoken._x86_frame_depth = frame_depth
looptoken._x86_param_depth = param_depth
- # we need to make sure here that we don't overload an mc badly.
- # a safe estimate is that we need at most 16 bytes per arg
- needed_mem = len(arglocs[0]) * 16 + 16
- if needed_mem >= self.mc.bytes_free():
- self.mc.make_new_mc()
+
looptoken._x86_direct_bootstrap_code = self.mc.tell()
self._assemble_bootstrap_direct_call(arglocs, curadr,
frame_depth+param_depth)
debug_print("Loop #", looptoken.number, "has address",
looptoken._x86_loop_code, "to", self.mc.tell())
self.mc.end_function()
+ self.write_pending_failure_recoveries()
-
def assemble_bridge(self, faildescr, inputargs, operations):
- self.make_sure_mc_exists()
+ if not we_are_translated():
+ # Arguments should be unique
+ assert len(set(inputargs)) == len(inputargs)
+
+ self.setup()
funcname = self._find_debug_merge_point(operations)
arglocs = self.rebuild_faillocs_from_descr(
@@ -302,6 +366,19 @@
descr_number,
"has address", adr_bridge, "to", self.mc.tell())
self.mc.end_function()
+ self.write_pending_failure_recoveries()
+
+ def write_pending_failure_recoveries(self):
+ for tok in self.pending_guard_tokens:
+ # Okay to write to _mc because we've already made sure that
+ # there's enough space by "reserving" bytes.
+ addr = self.generate_quick_failure(self.mc._mc, tok.faildescr, tok.failargs, tok.fail_locs, tok.exc, tok.desc_bytes)
+ tok.faildescr._x86_adr_recovery_stub = addr
+ self.patch_jump_for_descr(tok.faildescr, addr)
+
+ self.pending_guard_tokens = []
+ self.mc.reset_reserved_bytes()
+ self.mc.done()
def _find_debug_merge_point(self, operations):
@@ -319,8 +396,21 @@
def patch_jump_for_descr(self, faildescr, adr_new_target):
adr_jump_offset = faildescr._x86_adr_jump_offset
- mc = codebuf.InMemoryCodeBuilder(adr_jump_offset, adr_jump_offset + 4)
- mc.write(packimm32(adr_new_target - adr_jump_offset - 4))
+ adr_recovery_stub = faildescr._x86_adr_recovery_stub
+ offset = adr_new_target - (adr_jump_offset + 4)
+ # If the new target fits within a rel32 of the jump, just patch
+ # that. Otherwise, leave the original rel32 to the recovery stub in
+ # place, but clobber the recovery stub with a jump to the real
+ # target.
+ if rx86.fits_in_32bits(offset):
+ mc = codebuf.InMemoryCodeBuilder(adr_jump_offset, adr_jump_offset + 4)
+ mc.writeimm32(offset)
+ else:
+ # "mov r11, addr; jmp r11" is 13 bytes
+ mc = codebuf.InMemoryCodeBuilder(adr_recovery_stub, adr_recovery_stub + 13)
+ mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
+ mc.JMP_r(X86_64_SCRATCH_REG.value)
+
mc.valgrind_invalidated()
mc.done()
@@ -337,7 +427,6 @@
self.mc.POP(eax)
regalloc.walk_operations(operations)
self.mc.done()
- self.mc2.done()
if we_are_translated() or self.cpu.dont_keepalive_stuff:
self._regalloc = None # else keep it around for debugging
frame_depth = regalloc.fm.frame_depth
@@ -352,7 +441,7 @@
def _patchable_stackadjust(self):
# stack adjustment LEA
- self.mc.LEA(esp, fixedsize_ebp_ofs(0))
+ self.mc.LEA32_rb(esp.value, 0)
return self.mc.tell() - 4
def _patch_stackadjust(self, adr_lea, reserved_depth):
@@ -361,23 +450,34 @@
# Compute the correct offset for the instruction LEA ESP, [EBP-4*words].
# Given that [EBP] is where we saved EBP, i.e. in the last word
# of our fixed frame, then the 'words' value is:
- words = (FRAME_FIXED_SIZE - 1) + reserved_depth
+ words = (self.cpu.FRAME_FIXED_SIZE - 1) + reserved_depth
# align, e.g. for Mac OS X
aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
- mc.write(packimm32(-WORD * aligned_words))
+ mc.writeimm32(-WORD * aligned_words)
mc.done()
def _call_header(self):
- self.mc.PUSH(ebp)
- self.mc.MOV(ebp, esp)
- self.mc.PUSH(ebx)
- self.mc.PUSH(esi)
- self.mc.PUSH(edi)
+ self.mc.PUSH_r(ebp.value)
+ self.mc.MOV_rr(ebp.value, esp.value)
+ for regloc in self.cpu.CALLEE_SAVE_REGISTERS:
+ self.mc.PUSH_r(regloc.value)
+
# NB. the shape of the frame is hard-coded in get_basic_shape() too.
# Also, make sure this is consistent with FRAME_FIXED_SIZE.
return self._patchable_stackadjust()
+ def _call_footer(self):
+ self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
+
+ for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
+ self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
+
+ self.mc.POP_r(ebp.value)
+ self.mc.RET()
+
def _assemble_bootstrap_direct_call(self, arglocs, jmpadr, stackdepth):
+ if IS_X86_64:
+ return self._assemble_bootstrap_direct_call_64(arglocs, jmpadr, stackdepth)
# XXX pushing ebx esi and edi is a bit pointless, since we store
# all regsiters anyway, for the case of guard_not_forced
# XXX this can be improved greatly. Right now it'll behave like
@@ -388,23 +488,81 @@
self._patch_stackadjust(adr_stackadjust, stackdepth)
for i in range(len(nonfloatlocs)):
loc = nonfloatlocs[i]
- if isinstance(loc, REG):
- self.mc.MOV(loc, mem(ebp, (2 + i) * WORD))
+ if isinstance(loc, RegLoc):
+ assert not loc.is_xmm
+ self.mc.MOV_rb(loc.value, (2 + i) * WORD)
loc = floatlocs[i]
- if isinstance(loc, XMMREG):
- self.mc.MOVSD(loc, mem64(ebp, (1 + i) * 2 * WORD))
+ if isinstance(loc, RegLoc):
+ assert loc.is_xmm
+ self.mc.MOVSD_xb(loc.value, (1 + i) * 2 * WORD)
tmp = eax
xmmtmp = xmm0
for i in range(len(nonfloatlocs)):
loc = nonfloatlocs[i]
- if loc is not None and not isinstance(loc, REG):
- self.mc.MOV(tmp, mem(ebp, (2 + i) * WORD))
+ if loc is not None and not isinstance(loc, RegLoc):
+ self.mc.MOV_rb(tmp.value, (2 + i) * WORD)
self.mc.MOV(loc, tmp)
loc = floatlocs[i]
- if loc is not None and not isinstance(loc, XMMREG):
- self.mc.MOVSD(xmmtmp, mem64(ebp, (1 + i) * 2 * WORD))
- self.mc.MOVSD(loc, xmmtmp)
- self.mc.JMP(rel32(jmpadr))
+ if loc is not None and not isinstance(loc, RegLoc):
+ self.mc.MOVSD_xb(xmmtmp.value, (1 + i) * 2 * WORD)
+ assert isinstance(loc, StackLoc)
+ self.mc.MOVSD_bx(loc.value, xmmtmp.value)
+ self.mc.JMP_l(jmpadr)
+ return adr_stackadjust
+
+ def _assemble_bootstrap_direct_call_64(self, arglocs, jmpadr, stackdepth):
+ # XXX: Very similar to _emit_call_64
+
+ src_locs = []
+ dst_locs = []
+ xmm_src_locs = []
+ xmm_dst_locs = []
+ get_from_stack = []
+
+ # In reverse order for use with pop()
+ unused_gpr = [r9, r8, ecx, edx, esi, edi]
+ unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
+
+ nonfloatlocs, floatlocs = arglocs
+ adr_stackadjust = self._call_header()
+ self._patch_stackadjust(adr_stackadjust, stackdepth)
+
+ # The lists are padded with Nones
+ assert len(nonfloatlocs) == len(floatlocs)
+
+ for i in range(len(nonfloatlocs)):
+ loc = nonfloatlocs[i]
+ if loc is not None:
+ if len(unused_gpr) > 0:
+ src_locs.append(unused_gpr.pop())
+ dst_locs.append(loc)
+ else:
+ get_from_stack.append((loc, False))
+
+ floc = floatlocs[i]
+ if floc is not None:
+ if len(unused_xmm) > 0:
+ xmm_src_locs.append(unused_xmm.pop())
+ xmm_dst_locs.append(floc)
+ else:
+ get_from_stack.append((floc, True))
+
+ remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
+ remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
+
+ for i in range(len(get_from_stack)):
+ loc, is_xmm = get_from_stack[i]
+ if is_xmm:
+ self.mc.MOVSD_xb(X86_64_XMM_SCRATCH_REG.value, (2 + i) * WORD)
+ self.mc.MOVSD(loc, X86_64_XMM_SCRATCH_REG)
+ else:
+ self.mc.MOV_rb(X86_64_SCRATCH_REG.value, (2 + i) * WORD)
+ # XXX: We're assuming that "loc" won't require regloc to
+ # clobber the scratch register
+ self.mc.MOV(loc, X86_64_SCRATCH_REG)
+
+ self.mc.JMP(imm(jmpadr))
+
return adr_stackadjust
def _assemble_bootstrap_code(self, inputargs, arglocs):
@@ -412,11 +570,12 @@
adr_stackadjust = self._call_header()
tmp = X86RegisterManager.all_regs[0]
xmmtmp = X86XMMRegisterManager.all_regs[0]
+ self.mc._mc.begin_reuse_scratch_register()
for i in range(len(nonfloatlocs)):
loc = nonfloatlocs[i]
if loc is None:
continue
- if isinstance(loc, REG):
+ if isinstance(loc, RegLoc):
target = loc
else:
target = tmp
@@ -430,17 +589,20 @@
adr = self.fail_boxes_int.get_addr_for_num(i)
self.mc.MOV(target, heap(adr))
if target is not loc:
- self.mc.MOV(loc, target)
+ assert isinstance(loc, StackLoc)
+ self.mc.MOV_br(loc.value, target.value)
for i in range(len(floatlocs)):
loc = floatlocs[i]
if loc is None:
continue
adr = self.fail_boxes_float.get_addr_for_num(i)
- if isinstance(loc, REG):
- self.mc.MOVSD(loc, heap64(adr))
+ if isinstance(loc, RegLoc):
+ self.mc.MOVSD(loc, heap(adr))
else:
- self.mc.MOVSD(xmmtmp, heap64(adr))
- self.mc.MOVSD(loc, xmmtmp)
+ self.mc.MOVSD(xmmtmp, heap(adr))
+ assert isinstance(loc, StackLoc)
+ self.mc.MOVSD_bx(loc.value, xmmtmp.value)
+ self.mc._mc.end_reuse_scratch_register()
return adr_stackadjust
def dump(self, text):
@@ -453,27 +615,10 @@
finally:
Box._extended_display = _prev
- def _start_block(self):
- # Return a 'mc' that can be used to write an "atomic" block,
- # i.e. one that will not contain any JMP.
- mc = self.mc._mc
- if not we_are_translated():
- self._block_started_mc = (self.mc, mc.tell())
- self.mc = "block started"
- return mc
-
- def _stop_block(self):
- if not we_are_translated():
- assert self.mc == "block started"
- self.mc, orgpos = self._block_started_mc
- assert 0 <= self.mc._mc.tell() - orgpos <= 58, (
- "too many bytes in _start_block/_stop_block pair")
- del self._block_started_mc
-
# ------------------------------------------------------------
def mov(self, from_loc, to_loc):
- if isinstance(from_loc, XMMREG) or isinstance(to_loc, XMMREG):
+ if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
self.mc.MOVSD(to_loc, from_loc)
else:
self.mc.MOV(to_loc, from_loc)
@@ -481,24 +626,24 @@
regalloc_mov = mov # legacy interface
def regalloc_push(self, loc):
- if isinstance(loc, XMMREG):
- self.mc.SUB(esp, imm(2*WORD))
- self.mc.MOVSD(mem64(esp, 0), loc)
- elif isinstance(loc, MODRM64):
+ if isinstance(loc, RegLoc) and loc.is_xmm:
+ self.mc.SUB_ri(esp.value, 2*WORD)
+ self.mc.MOVSD_sx(0, loc.value)
+ elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
# XXX evil trick
- self.mc.PUSH(mem(ebp, get_ebp_ofs(loc.position)))
- self.mc.PUSH(mem(ebp, get_ebp_ofs(loc.position + 1)))
+ self.mc.PUSH_b(get_ebp_ofs(loc.position))
+ self.mc.PUSH_b(get_ebp_ofs(loc.position + 1))
else:
self.mc.PUSH(loc)
def regalloc_pop(self, loc):
- if isinstance(loc, XMMREG):
- self.mc.MOVSD(loc, mem64(esp, 0))
- self.mc.ADD(esp, imm(2*WORD))
- elif isinstance(loc, MODRM64):
+ if isinstance(loc, RegLoc) and loc.is_xmm:
+ self.mc.MOVSD_xs(loc.value, 0)
+ self.mc.ADD_ri(esp.value, 2*WORD)
+ elif WORD == 4 and isinstance(loc, StackLoc) and loc.width == 8:
# XXX evil trick
- self.mc.POP(mem(ebp, get_ebp_ofs(loc.position + 1)))
- self.mc.POP(mem(ebp, get_ebp_ofs(loc.position)))
+ self.mc.POP_b(get_ebp_ofs(loc.position + 1))
+ self.mc.POP_b(get_ebp_ofs(loc.position))
else:
self.mc.POP(loc)
@@ -515,14 +660,14 @@
faildescr._x86_current_depths = current_depths
failargs = guard_op.fail_args
guard_opnum = guard_op.opnum
- failaddr = self.implement_guard_recovery(guard_opnum,
- faildescr, failargs,
- faillocs)
+ guard_token = self.implement_guard_recovery(guard_opnum,
+ faildescr, failargs,
+ faillocs)
if op is None:
dispatch_opnum = guard_opnum
else:
dispatch_opnum = op.opnum
- res = genop_guard_list[dispatch_opnum](self, op, guard_op, failaddr,
+ res = genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token,
arglocs, resloc)
faildescr._x86_adr_jump_offset = res
@@ -549,103 +694,158 @@
rl = result_loc.lowest8bits()
if isinstance(op.args[0], Const):
self.mc.CMP(arglocs[1], arglocs[0])
- getattr(self.mc, 'SET' + rev_cond)(rl)
+ self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value)
else:
self.mc.CMP(arglocs[0], arglocs[1])
- getattr(self.mc, 'SET' + cond)(rl)
- self.mc.MOVZX(result_loc, rl)
+ self.mc.SET_ir(rx86.Conditions[cond], rl.value)
+ self.mc.MOVZX8_rr(result_loc.value, rl.value)
return genop_cmp
def _cmpop_float(cond, is_ne=False):
def genop_cmp(self, op, arglocs, result_loc):
self.mc.UCOMISD(arglocs[0], arglocs[1])
- rl = result_loc.lowest8bits()
- rh = result_loc.higher8bits()
- getattr(self.mc, 'SET' + cond)(rl)
+ tmp1 = result_loc.lowest8bits()
+ if IS_X86_32:
+ tmp2 = result_loc.higher8bits()
+ elif IS_X86_64:
+ tmp2 = X86_64_SCRATCH_REG.lowest8bits()
+
+ self.mc.SET_ir(rx86.Conditions[cond], tmp1.value)
if is_ne:
- self.mc.SETP(rh)
- self.mc.OR(rl, rh)
+ self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
+ self.mc.OR8_rr(tmp1.value, tmp2.value)
else:
- self.mc.SETNP(rh)
- self.mc.AND(rl, rh)
- self.mc.MOVZX(result_loc, rl)
+ self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
+ self.mc.AND8_rr(tmp1.value, tmp2.value)
+ self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
return genop_cmp
def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
- def genop_cmp_guard(self, op, guard_op, addr, arglocs, result_loc):
+ def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc):
guard_opnum = guard_op.opnum
if isinstance(op.args[0], Const):
self.mc.CMP(arglocs[1], arglocs[0])
if guard_opnum == rop.GUARD_FALSE:
- name = 'J' + rev_cond
- return self.implement_guard(addr, getattr(self.mc, name))
+ return self.implement_guard(guard_token, rev_cond)
else:
- name = 'J' + false_rev_cond
- return self.implement_guard(addr, getattr(self.mc, name))
+ return self.implement_guard(guard_token, false_rev_cond)
else:
self.mc.CMP(arglocs[0], arglocs[1])
if guard_opnum == rop.GUARD_FALSE:
- name = 'J' + cond
- return self.implement_guard(addr, getattr(self.mc, name))
+ return self.implement_guard(guard_token, cond)
else:
- name = 'J' + false_cond
- return self.implement_guard(addr, getattr(self.mc, name))
+ return self.implement_guard(guard_token, false_cond)
return genop_cmp_guard
def _cmpop_guard_float(cond, false_cond, need_jp):
- def genop_cmp_guard_float(self, op, guard_op, addr, arglocs,
+ def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs,
result_loc):
guard_opnum = guard_op.opnum
self.mc.UCOMISD(arglocs[0], arglocs[1])
+ # 16 is enough space for the rel8 jumps below and the rel32
+ # jump in implement_guard
+ self.mc.ensure_bytes_available(16 + guard_token.recovery_stub_size())
if guard_opnum == rop.GUARD_FALSE:
- mc = self.mc._mc
- name = 'J' + cond
if need_jp:
- mc.JP(rel8(6))
- getattr(mc, name)(rel32(addr))
- return mc.tell() - 4
+ self.mc.J_il8(rx86.Conditions['P'], 6)
+ return self.implement_guard(guard_token, cond)
else:
if need_jp:
- mc = self.mc._mc
- mc.JP(rel8(2))
- getattr(mc, 'J' + cond)(rel8(5))
- return self.implement_guard(addr, mc.JMP)
- name = 'J' + false_cond
- return self.implement_guard(addr, getattr(self.mc, name))
+ self.mc.J_il8(rx86.Conditions['P'], 2)
+ self.mc.J_il8(rx86.Conditions[cond], 5)
+ return self.implement_guard(guard_token)
+ return self.implement_guard(guard_token, false_cond)
return genop_cmp_guard_float
- @specialize.arg(5)
- def _emit_call(self, x, arglocs, start=0, tmp=eax, force_mc=False,
- mc=None):
- if not force_mc:
- mc = self.mc
+ def _emit_call(self, x, arglocs, start=0, tmp=eax):
+ if IS_X86_64:
+ return self._emit_call_64(x, arglocs, start)
+
p = 0
n = len(arglocs)
for i in range(start, n):
loc = arglocs[i]
- if isinstance(loc, REG):
- if isinstance(loc, XMMREG):
- mc.MOVSD(mem64(esp, p), loc)
+ if isinstance(loc, RegLoc):
+ if loc.is_xmm:
+ self.mc.MOVSD_sx(p, loc.value)
else:
- mc.MOV(mem(esp, p), loc)
+ self.mc.MOV_sr(p, loc.value)
p += round_up_to_4(loc.width)
p = 0
for i in range(start, n):
loc = arglocs[i]
- if not isinstance(loc, REG):
- if isinstance(loc, MODRM64):
- mc.MOVSD(xmm0, loc)
- mc.MOVSD(mem64(esp, p), xmm0)
+ if not isinstance(loc, RegLoc):
+ if loc.width == 8:
+ self.mc.MOVSD(xmm0, loc)
+ self.mc.MOVSD_sx(p, xmm0.value)
else:
- mc.MOV(tmp, loc)
- mc.MOV(mem(esp, p), tmp)
+ self.mc.MOV(tmp, loc)
+ self.mc.MOV_sr(p, tmp.value)
p += round_up_to_4(loc.width)
self._regalloc.reserve_param(p//WORD)
- mc.CALL(x)
+ # x is a location
+ self.mc.CALL(x)
self.mark_gc_roots()
+
+ def _emit_call_64(self, x, arglocs, start=0):
+ src_locs = []
+ dst_locs = []
+ xmm_src_locs = []
+ xmm_dst_locs = []
+ pass_on_stack = []
+
+ # In reverse order for use with pop()
+ unused_gpr = [r9, r8, ecx, edx, esi, edi]
+ unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
+
+ for i in range(start, len(arglocs)):
+ loc = arglocs[i]
+ assert isinstance(loc, RegLoc) or isinstance(loc, ImmedLoc) or isinstance(loc, StackLoc)
+
+ # XXX: Should be much simplier to tell whether a location is a float!
+ if (isinstance(loc, RegLoc) and loc.is_xmm) or (isinstance(loc, StackLoc) and loc.type == FLOAT):
+ if len(unused_xmm) > 0:
+ xmm_src_locs.append(loc)
+ xmm_dst_locs.append(unused_xmm.pop())
+ else:
+ pass_on_stack.append(loc)
+ else:
+ if len(unused_gpr) > 0:
+ src_locs.append(loc)
+ dst_locs.append(unused_gpr.pop())
+ else:
+ pass_on_stack.append(loc)
+ # Emit instructions to pass the stack arguments
+ # XXX: Would be nice to let remap_frame_layout take care of this, but
+ # we'd need to create something like StackLoc, but relative to esp,
+ # and I don't know if it's worth it.
+ for i in range(len(pass_on_stack)):
+ loc = pass_on_stack[i]
+ if not isinstance(loc, RegLoc):
+ if isinstance(loc, StackLoc) and loc.type == FLOAT:
+ self.mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
+ self.mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
+ else:
+ self.mc.MOV(X86_64_SCRATCH_REG, loc)
+ self.mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
+ else:
+ # It's a register
+ if loc.is_xmm:
+ self.mc.MOVSD_sx(i*WORD, loc.value)
+ else:
+ self.mc.MOV_sr(i*WORD, loc.value)
+
+ # Handle register arguments
+ remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
+ remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
+
+ self._regalloc.reserve_param(len(pass_on_stack))
+ self.mc.CALL(x)
+ self.mark_gc_roots()
+
def call(self, addr, args, res):
- self._emit_call(rel32(addr), args)
+ self._emit_call(imm(addr), args)
assert res is eax
genop_int_neg = _unaryop("NEG")
@@ -656,6 +856,9 @@
genop_int_and = _binaryop("AND", True)
genop_int_or = _binaryop("OR", True)
genop_int_xor = _binaryop("XOR", True)
+ genop_int_lshift = _binaryop("SHL")
+ genop_int_rshift = _binaryop("SAR")
+ genop_uint_rshift = _binaryop("SHR")
genop_float_add = _binaryop("ADDSD", True)
genop_float_sub = _binaryop('SUBSD')
genop_float_mul = _binaryop('MULSD', True)
@@ -702,26 +905,27 @@
genop_guard_float_gt = _cmpop_guard_float("A", "BE", False)
genop_guard_float_ge = _cmpop_guard_float("AE", "B", False)
- def genop_guard_float_ne(self, op, guard_op, addr, arglocs, result_loc):
+ def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
guard_opnum = guard_op.opnum
self.mc.UCOMISD(arglocs[0], arglocs[1])
- mc = self.mc._mc
+ # 16 is enough space for the rel8 jumps below and the rel32
+ # jump in implement_guard
+ self.mc.ensure_bytes_available(16 + guard_token.recovery_stub_size())
if guard_opnum == rop.GUARD_TRUE:
- mc.JP(rel8(6))
- mc.JE(rel32(addr))
- return mc.tell() - 4
- else:
- mc.JP(rel8(2))
- mc.JE(rel8(5))
- return self.implement_guard(addr, mc.JMP)
+ self.mc.J_il8(rx86.Conditions['P'], 6)
+ return self.implement_guard(guard_token, 'E')
+ else:
+ self.mc.J_il8(rx86.Conditions['P'], 2)
+ self.mc.J_il8(rx86.Conditions['E'], 5)
+ return self.implement_guard(guard_token)
def genop_float_neg(self, op, arglocs, resloc):
# Following what gcc does: res = x ^ 0x8000000000000000
- self.mc.XORPD(arglocs[0], self.loc_float_const_neg)
+ self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
def genop_float_abs(self, op, arglocs, resloc):
# Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
- self.mc.ANDPD(arglocs[0], self.loc_float_const_abs)
+ self.mc.ANDPD(arglocs[0], heap(self.float_const_abs_addr))
def genop_cast_float_to_int(self, op, arglocs, resloc):
self.mc.CVTTSD2SI(resloc, arglocs[0])
@@ -729,70 +933,56 @@
def genop_cast_int_to_float(self, op, arglocs, resloc):
self.mc.CVTSI2SD(resloc, arglocs[0])
- def genop_int_lshift(self, op, arglocs, resloc):
- loc, loc2 = arglocs
- if loc2 is ecx:
- loc2 = cl
- self.mc.SHL(loc, loc2)
-
- def genop_int_rshift(self, op, arglocs, resloc):
- loc, loc2 = arglocs
- if loc2 is ecx:
- loc2 = cl
- self.mc.SAR(loc, loc2)
-
- def genop_uint_rshift(self, op, arglocs, resloc):
- loc, loc2 = arglocs
- if loc2 is ecx:
- loc2 = cl
- self.mc.SHR(loc, loc2)
-
- def genop_guard_int_is_true(self, op, guard_op, addr, arglocs, resloc):
+ def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
guard_opnum = guard_op.opnum
- self.mc.CMP(arglocs[0], imm8(0))
+ self.mc.CMP(arglocs[0], imm(0))
if guard_opnum == rop.GUARD_TRUE:
- return self.implement_guard(addr, self.mc.JZ)
+ return self.implement_guard(guard_token, 'Z')
else:
- return self.implement_guard(addr, self.mc.JNZ)
+ return self.implement_guard(guard_token, 'NZ')
def genop_int_is_true(self, op, arglocs, resloc):
- self.mc.CMP(arglocs[0], imm8(0))
+ self.mc.CMP(arglocs[0], imm(0))
rl = resloc.lowest8bits()
- self.mc.SETNE(rl)
- self.mc.MOVZX(resloc, rl)
+ self.mc.SET_ir(rx86.Conditions['NE'], rl.value)
+ self.mc.MOVZX8(resloc, rl)
- def genop_guard_int_is_zero(self, op, guard_op, addr, arglocs, resloc):
+ def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc):
guard_opnum = guard_op.opnum
- self.mc.CMP(arglocs[0], imm8(0))
+ self.mc.CMP(arglocs[0], imm(0))
if guard_opnum == rop.GUARD_TRUE:
- return self.implement_guard(addr, self.mc.JNZ)
+ return self.implement_guard(guard_token, 'NZ')
else:
- return self.implement_guard(addr, self.mc.JZ)
+ return self.implement_guard(guard_token, 'Z')
def genop_int_is_zero(self, op, arglocs, resloc):
- self.mc.CMP(arglocs[0], imm8(0))
+ self.mc.CMP(arglocs[0], imm(0))
rl = resloc.lowest8bits()
- self.mc.SETE(rl)
- self.mc.MOVZX(resloc, rl)
+ self.mc.SET_ir(rx86.Conditions['E'], rl.value)
+ self.mc.MOVZX8(resloc, rl)
def genop_same_as(self, op, arglocs, resloc):
self.mov(arglocs[0], resloc)
#genop_cast_ptr_to_int = genop_same_as
def genop_int_mod(self, op, arglocs, resloc):
- self.mc.CDQ()
- self.mc.IDIV(ecx)
+ if IS_X86_32:
+ self.mc.CDQ()
+ elif IS_X86_64:
+ self.mc.CQO()
+
+ self.mc.IDIV_r(ecx.value)
genop_int_floordiv = genop_int_mod
def genop_uint_floordiv(self, op, arglocs, resloc):
- self.mc.XOR(edx, edx)
- self.mc.DIV(ecx)
+ self.mc.XOR_rr(edx.value, edx.value)
+ self.mc.DIV_r(ecx.value)
def genop_new_with_vtable(self, op, arglocs, result_loc):
assert result_loc is eax
loc_vtable = arglocs[-1]
- assert isinstance(loc_vtable, IMM32)
+ assert isinstance(loc_vtable, ImmedLoc)
arglocs = arglocs[:-1]
self.call(self.malloc_func_addr, arglocs, eax)
# xxx ignore NULL returns for now
@@ -800,7 +990,9 @@
def set_vtable(self, loc, loc_vtable):
if self.cpu.vtable_offset is not None:
- self.mc.MOV(mem(loc, self.cpu.vtable_offset), loc_vtable)
+ assert isinstance(loc, RegLoc)
+ assert isinstance(loc_vtable, ImmedLoc)
+ self.mc.MOV_mi((loc.value, self.cpu.vtable_offset), loc_vtable.value)
# XXX genop_new is abused for all varsized mallocs with Boehm, for now
# (instead of genop_new_array, genop_newstr, genop_newunicode)
@@ -822,16 +1014,19 @@
def genop_getfield_gc(self, op, arglocs, resloc):
base_loc, ofs_loc, size_loc = arglocs
- assert isinstance(size_loc, IMM32)
+ assert isinstance(size_loc, ImmedLoc)
+ assert isinstance(resloc, RegLoc)
size = size_loc.value
- if size == 1:
- self.mc.MOVZX(resloc, addr8_add(base_loc, ofs_loc))
+
+ source_addr = AddressLoc(base_loc, ofs_loc)
+ if resloc.is_xmm:
+ self.mc.MOVSD(resloc, source_addr)
+ elif size == 1:
+ self.mc.MOVZX8(resloc, source_addr)
elif size == 2:
- self.mc.MOVZX(resloc, addr_add(base_loc, ofs_loc))
+ self.mc.MOVZX16(resloc, source_addr)
elif size == WORD:
- self.mc.MOV(resloc, addr_add(base_loc, ofs_loc))
- elif size == 8:
- self.mc.MOVSD(resloc, addr64_add(base_loc, ofs_loc))
+ self.mc.MOV(resloc, source_addr)
else:
raise NotImplementedError("getfield size = %d" % size)
@@ -841,16 +1036,16 @@
def genop_getarrayitem_gc(self, op, arglocs, resloc):
base_loc, ofs_loc, scale, ofs = arglocs
- assert isinstance(ofs, IMM32)
- assert isinstance(scale, IMM32)
+ assert isinstance(ofs, ImmedLoc)
+ assert isinstance(scale, ImmedLoc)
if op.result.type == FLOAT:
- self.mc.MOVSD(resloc, addr64_add(base_loc, ofs_loc, ofs.value,
+ self.mc.MOVSD(resloc, addr_add(base_loc, ofs_loc, ofs.value,
scale.value))
else:
if scale.value == 0:
- self.mc.MOVZX(resloc, addr8_add(base_loc, ofs_loc, ofs.value,
+ self.mc.MOVZX8(resloc, addr_add(base_loc, ofs_loc, ofs.value,
scale.value))
- elif scale.value == 2:
+ elif (1 << scale.value) == WORD:
self.mc.MOV(resloc, addr_add(base_loc, ofs_loc, ofs.value,
scale.value))
else:
@@ -862,34 +1057,33 @@
def genop_discard_setfield_gc(self, op, arglocs):
base_loc, ofs_loc, size_loc, value_loc = arglocs
- assert isinstance(size_loc, IMM32)
+ assert isinstance(size_loc, ImmedLoc)
size = size_loc.value
- if size == WORD * 2:
- self.mc.MOVSD(addr64_add(base_loc, ofs_loc), value_loc)
+ dest_addr = AddressLoc(base_loc, ofs_loc)
+ if isinstance(value_loc, RegLoc) and value_loc.is_xmm:
+ self.mc.MOVSD(dest_addr, value_loc)
elif size == WORD:
- self.mc.MOV(addr_add(base_loc, ofs_loc), value_loc)
+ self.mc.MOV(dest_addr, value_loc)
elif size == 2:
- self.mc.MOV16(addr_add(base_loc, ofs_loc), value_loc)
+ self.mc.MOV16(dest_addr, value_loc)
elif size == 1:
- self.mc.MOV(addr8_add(base_loc, ofs_loc), value_loc.lowest8bits())
+ self.mc.MOV8(dest_addr, value_loc.lowest8bits())
else:
print "[asmgen]setfield addr size %d" % size
raise NotImplementedError("Addr size %d" % size)
def genop_discard_setarrayitem_gc(self, op, arglocs):
base_loc, ofs_loc, value_loc, scale_loc, baseofs = arglocs
- assert isinstance(baseofs, IMM32)
- assert isinstance(scale_loc, IMM32)
+ assert isinstance(baseofs, ImmedLoc)
+ assert isinstance(scale_loc, ImmedLoc)
+ dest_addr = AddressLoc(base_loc, ofs_loc, scale_loc.value, baseofs.value)
if op.args[2].type == FLOAT:
- self.mc.MOVSD(addr64_add(base_loc, ofs_loc, baseofs.value,
- scale_loc.value), value_loc)
+ self.mc.MOVSD(dest_addr, value_loc)
else:
- if scale_loc.value == 2:
- self.mc.MOV(addr_add(base_loc, ofs_loc, baseofs.value,
- scale_loc.value), value_loc)
+ if (1 << scale_loc.value) == WORD:
+ self.mc.MOV(dest_addr, value_loc)
elif scale_loc.value == 0:
- self.mc.MOV(addr8_add(base_loc, ofs_loc, baseofs.value,
- scale_loc.value), value_loc.lowest8bits())
+ self.mc.MOV8(dest_addr, value_loc.lowest8bits())
else:
raise NotImplementedError("scale = %d" % scale_loc.value)
@@ -898,17 +1092,17 @@
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
- self.mc.MOV(addr8_add(base_loc, ofs_loc, basesize),
- val_loc.lowest8bits())
+ dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize)
+ self.mc.MOV8(dest_addr, val_loc.lowest8bits())
def genop_discard_unicodesetitem(self, op, arglocs):
base_loc, ofs_loc, val_loc = arglocs
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
self.cpu.translate_support_code)
if itemsize == 4:
- self.mc.MOV(addr_add(base_loc, ofs_loc, basesize, 2), val_loc)
+ self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
elif itemsize == 2:
- self.mc.MOV16(addr_add(base_loc, ofs_loc, basesize, 1), val_loc)
+ self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
else:
assert 0, itemsize
@@ -929,7 +1123,7 @@
def genop_arraylen_gc(self, op, arglocs, resloc):
base_loc, ofs_loc = arglocs
- assert isinstance(ofs_loc, IMM32)
+ assert isinstance(ofs_loc, ImmedLoc)
self.mc.MOV(resloc, addr_add_const(base_loc, ofs_loc.value))
def genop_strgetitem(self, op, arglocs, resloc):
@@ -937,83 +1131,83 @@
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
self.cpu.translate_support_code)
assert itemsize == 1
- self.mc.MOVZX(resloc, addr8_add(base_loc, ofs_loc, basesize))
+ self.mc.MOVZX8(resloc, AddressLoc(base_loc, ofs_loc, 0, basesize))
def genop_unicodegetitem(self, op, arglocs, resloc):
base_loc, ofs_loc = arglocs
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
self.cpu.translate_support_code)
if itemsize == 4:
- self.mc.MOV(resloc, addr_add(base_loc, ofs_loc, basesize, 2))
+ self.mc.MOV32(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
elif itemsize == 2:
- self.mc.MOVZX(resloc, addr_add(base_loc, ofs_loc, basesize, 1))
+ self.mc.MOVZX16(resloc, AddressLoc(base_loc, ofs_loc, 1, basesize))
else:
assert 0, itemsize
- def genop_guard_guard_true(self, ign_1, guard_op, addr, locs, ign_2):
+ def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
loc = locs[0]
self.mc.TEST(loc, loc)
- return self.implement_guard(addr, self.mc.JZ)
+ return self.implement_guard(guard_token, 'Z')
genop_guard_guard_nonnull = genop_guard_guard_true
- def genop_guard_guard_no_exception(self, ign_1, guard_op, addr,
+ def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
locs, ign_2):
self.mc.CMP(heap(self.cpu.pos_exception()), imm(0))
- return self.implement_guard(addr, self.mc.JNZ)
+ return self.implement_guard(guard_token, 'NZ')
- def genop_guard_guard_exception(self, ign_1, guard_op, addr,
+ def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
locs, resloc):
loc = locs[0]
loc1 = locs[1]
self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
self.mc.CMP(loc1, loc)
- addr = self.implement_guard(addr, self.mc.JNE)
+ addr = self.implement_guard(guard_token, 'NE')
if resloc is not None:
self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
self.mc.MOV(heap(self.cpu.pos_exception()), imm(0))
self.mc.MOV(heap(self.cpu.pos_exc_value()), imm(0))
return addr
- def _gen_guard_overflow(self, guard_op, addr):
+ def _gen_guard_overflow(self, guard_op, guard_token):
guard_opnum = guard_op.opnum
if guard_opnum == rop.GUARD_NO_OVERFLOW:
- return self.implement_guard(addr, self.mc.JO)
+ return self.implement_guard(guard_token, 'O')
elif guard_opnum == rop.GUARD_OVERFLOW:
- return self.implement_guard(addr, self.mc.JNO)
+ return self.implement_guard(guard_token, 'NO')
else:
print "int_xxx_ovf followed by", guard_op.getopname()
raise AssertionError
- def genop_guard_int_add_ovf(self, op, guard_op, addr, arglocs, result_loc):
+ def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
self.genop_int_add(op, arglocs, result_loc)
- return self._gen_guard_overflow(guard_op, addr)
+ return self._gen_guard_overflow(guard_op, guard_token)
- def genop_guard_int_sub_ovf(self, op, guard_op, addr, arglocs, result_loc):
+ def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
self.genop_int_sub(op, arglocs, result_loc)
- return self._gen_guard_overflow(guard_op, addr)
+ return self._gen_guard_overflow(guard_op, guard_token)
- def genop_guard_int_mul_ovf(self, op, guard_op, addr, arglocs, result_loc):
+ def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
self.genop_int_mul(op, arglocs, result_loc)
- return self._gen_guard_overflow(guard_op, addr)
+ return self._gen_guard_overflow(guard_op, guard_token)
- def genop_guard_guard_false(self, ign_1, guard_op, addr, locs, ign_2):
+ def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
loc = locs[0]
self.mc.TEST(loc, loc)
- return self.implement_guard(addr, self.mc.JNZ)
+ return self.implement_guard(guard_token, 'NZ')
genop_guard_guard_isnull = genop_guard_guard_false
- def genop_guard_guard_value(self, ign_1, guard_op, addr, locs, ign_2):
+ def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2):
if guard_op.args[0].type == FLOAT:
assert guard_op.args[1].type == FLOAT
self.mc.UCOMISD(locs[0], locs[1])
else:
self.mc.CMP(locs[0], locs[1])
- return self.implement_guard(addr, self.mc.JNE)
+ return self.implement_guard(guard_token, 'NE')
- def _cmp_guard_class(self, mc, locs):
+ def _cmp_guard_class(self, locs):
offset = self.cpu.vtable_offset
if offset is not None:
- mc.CMP(mem(locs[0], offset), locs[1])
+ self.mc.CMP(mem(locs[0], offset), locs[1])
else:
# XXX hard-coded assumption: to go from an object to its class
# we use the following algorithm:
@@ -1022,7 +1216,7 @@
# - multiply by 4 and use it as an offset in type_info_group
# - add 16 bytes, to go past the TYPE_INFO structure
loc = locs[1]
- assert isinstance(loc, IMM32)
+ assert isinstance(loc, ImmedLoc)
classptr = loc.value
# here, we have to go back from 'classptr' to the value expected
# from reading the 16 bits in the object header
@@ -1031,37 +1225,37 @@
type_info_group = llop.gc_get_type_info_group(llmemory.Address)
type_info_group = rffi.cast(lltype.Signed, type_info_group)
expected_typeid = (classptr - sizeof_ti - type_info_group) >> 2
- mc.CMP16(mem(locs[0], 0), imm32(expected_typeid))
+ self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
- def genop_guard_guard_class(self, ign_1, guard_op, addr, locs, ign_2):
- mc = self._start_block()
- self._cmp_guard_class(mc, locs)
- self._stop_block()
- return self.implement_guard(addr, self.mc.JNE)
+ def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
+ self.mc.ensure_bytes_available(256)
+ self._cmp_guard_class(locs)
+ return self.implement_guard(guard_token, 'NE')
def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
- addr, locs, ign_2):
- mc = self._start_block()
- mc.CMP(locs[0], imm8(1))
- mc.JB(rel8_patched_later)
- jb_location = mc.get_relative_pos()
- self._cmp_guard_class(mc, locs)
+ guard_token, locs, ign_2):
+ self.mc.ensure_bytes_available(256)
+ self.mc.CMP(locs[0], imm(1))
+ # Patched below
+ self.mc.J_il8(rx86.Conditions['B'], 0)
+ jb_location = self.mc.get_relative_pos()
+ self._cmp_guard_class(locs)
# patch the JB above
- offset = mc.get_relative_pos() - jb_location
+ offset = self.mc.get_relative_pos() - jb_location
assert 0 < offset <= 127
- mc.overwrite(jb_location-1, [chr(offset)])
- self._stop_block()
+ self.mc.overwrite(jb_location-1, [chr(offset)])
#
- return self.implement_guard(addr, self.mc.JNE)
+ return self.implement_guard(guard_token, 'NE')
def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
fail_locs):
exc = (guard_opnum == rop.GUARD_EXCEPTION or
guard_opnum == rop.GUARD_NO_EXCEPTION or
guard_opnum == rop.GUARD_NOT_FORCED)
- return self.generate_quick_failure(faildescr, failargs, fail_locs, exc)
+ desc_bytes = self.failure_recovery_description(failargs, fail_locs)
+ return GuardToken(faildescr, failargs, fail_locs, exc, desc_bytes)
- def generate_quick_failure(self, faildescr, failargs, fail_locs, exc):
+ def generate_quick_failure(self, mc, faildescr, failargs, fail_locs, exc, desc_bytes):
"""Generate the initial code for handling a failure. We try to
keep it as compact as possible. The idea is that this code is
executed at most once (and very often, zero times); when
@@ -1069,38 +1263,43 @@
really handle recovery from this particular failure.
"""
fail_index = self.cpu.get_fail_descr_number(faildescr)
- bytes_needed = 20 + 5 * len(failargs) # conservative estimate
- if self.mc2.bytes_free() < bytes_needed:
- self.mc2.make_new_mc()
- mc = self.mc2._mc
addr = mc.tell()
withfloats = False
for box in failargs:
if box is not None and box.type == FLOAT:
withfloats = True
break
- mc.CALL(rel32(self.failure_recovery_code[exc + 2 * withfloats]))
+ mc.CALL(imm(self.failure_recovery_code[exc + 2 * withfloats]))
# write tight data that describes the failure recovery
faildescr._x86_failure_recovery_bytecode = mc.tell()
- self.write_failure_recovery_description(mc, failargs, fail_locs)
+ for byte in desc_bytes:
+ mc.writechr(ord(byte))
# write the fail_index too
- mc.write(packimm32(fail_index))
+ mc.writeimm32(fail_index)
# for testing the decoding, write a final byte 0xCC
if not we_are_translated():
mc.writechr(0xCC)
faildescr._x86_debug_faillocs = [loc for loc in fail_locs
if loc is not None]
+
+ # Make sure the recovery stub is at least 16 bytes long (for the
+ # case where we overwrite the recovery stub with a 64-bit absolute
+ # jump)
+ while mc.tell() - addr < 16:
+ mc.writechr(0x00)
return addr
DESCR_REF = 0x00
DESCR_INT = 0x01
DESCR_FLOAT = 0x02
DESCR_SPECIAL = 0x03
- CODE_FROMSTACK = 4*8
+ # XXX: 4*8 works on i386, should we optimize for that case?
+ CODE_FROMSTACK = 4*16
CODE_STOP = 0 | DESCR_SPECIAL
CODE_HOLE = 4 | DESCR_SPECIAL
- def write_failure_recovery_description(self, mc, failargs, locs):
+ def failure_recovery_description(self, failargs, locs):
+ desc_bytes = []
for i in range(len(failargs)):
arg = failargs[i]
if arg is not None:
@@ -1113,24 +1312,30 @@
else:
raise AssertionError("bogus kind")
loc = locs[i]
- if isinstance(loc, MODRM):
+ if isinstance(loc, StackLoc):
n = self.CODE_FROMSTACK//4 + loc.position
else:
- assert isinstance(loc, REG)
- n = loc.op
+ assert isinstance(loc, RegLoc)
+ n = loc.value
n = kind + 4*n
while n > 0x7F:
- mc.writechr((n & 0x7F) | 0x80)
+ desc_bytes.append(chr((n & 0x7F) | 0x80))
n >>= 7
else:
n = self.CODE_HOLE
- mc.writechr(n)
- mc.writechr(self.CODE_STOP)
+ desc_bytes.append(chr(n))
+ desc_bytes.append(chr(self.CODE_STOP))
# assert that the fail_boxes lists are big enough
assert len(failargs) <= self.fail_boxes_int.SIZE
+ return desc_bytes
+
+ def write_failure_recovery_description(self, mc, failargs, locs):
+ for byte in self.failure_recovery_description(failargs, locs):
+ mc.writechr(ord(byte))
def rebuild_faillocs_from_descr(self, bytecode):
from pypy.jit.backend.x86.regalloc import X86FrameManager
+ descr_to_box_type = [REF, INT, FLOAT]
bytecode = rffi.cast(rffi.UCHARP, bytecode)
arglocs = []
while 1:
@@ -1155,7 +1360,7 @@
size = 2
else:
size = 1
- loc = X86FrameManager.frame_pos(code, size)
+ loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
elif code == self.CODE_STOP:
break
elif code == self.CODE_HOLE:
@@ -1165,16 +1370,16 @@
kind = code & 3
code >>= 2
if kind == self.DESCR_FLOAT:
- loc = xmm_registers[code]
+ loc = regloc.XMMREGLOCS[code]
else:
- loc = registers[code]
+ loc = regloc.REGLOCS[code]
arglocs.append(loc)
return arglocs[:]
@rgc.no_collect
def grab_frame_values(self, bytecode, frame_addr, allregisters):
# no malloc allowed here!!
- self.fail_ebp = allregisters[16 + ebp.op]
+ self.fail_ebp = allregisters[16 + ebp.value]
num = 0
value_hi = 0
while 1:
@@ -1197,7 +1402,7 @@
code = (code - self.CODE_FROMSTACK) >> 2
stackloc = frame_addr + get_ebp_ofs(code)
value = rffi.cast(rffi.LONGP, stackloc)[0]
- if kind == self.DESCR_FLOAT:
+ if kind == self.DESCR_FLOAT and WORD == 4:
value_hi = value
value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
else:
@@ -1211,8 +1416,11 @@
break
code >>= 2
if kind == self.DESCR_FLOAT:
- value = allregisters[2*code]
- value_hi = allregisters[2*code + 1]
+ if WORD == 4:
+ value = allregisters[2*code]
+ value_hi = allregisters[2*code + 1]
+ else:
+ value = allregisters[code]
else:
value = allregisters[16 + code]
@@ -1223,7 +1431,8 @@
tgt = self.fail_boxes_ptr.get_addr_for_num(num)
elif kind == self.DESCR_FLOAT:
tgt = self.fail_boxes_float.get_addr_for_num(num)
- rffi.cast(rffi.LONGP, tgt)[1] = value_hi
+ if WORD == 4:
+ rffi.cast(rffi.LONGP, tgt)[1] = value_hi
else:
assert 0, "bogus kind"
rffi.cast(rffi.LONGP, tgt)[0] = value
@@ -1232,7 +1441,8 @@
if not we_are_translated():
assert bytecode[4] == 0xCC
self.fail_boxes_count = num
- fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
+ fail_index = rffi.cast(rffi.INTP, bytecode)[0]
+ fail_index = rffi.cast(lltype.Signed, fail_index)
return fail_index
def setup_failure_recovery(self):
@@ -1243,8 +1453,8 @@
# original value of the registers, optionally the original
# value of XMM registers, and finally a reference to the
# recovery bytecode. See _build_failure_recovery() for details.
- stack_at_ebp = registers[ebp.op]
- bytecode = rffi.cast(rffi.UCHARP, registers[8])
+ stack_at_ebp = registers[ebp.value]
+ bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
allregisters = rffi.ptradd(registers, -16)
return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
@@ -1259,23 +1469,23 @@
self.failure_recovery_func)
failure_recovery_func = rffi.cast(lltype.Signed,
failure_recovery_func)
- mc = self.mc2._mc
+ mc = self.mc._mc
# Assume that we are called at the beginning, when there is no risk
# that 'mc' runs out of space. Checked by asserts in mc.write().
recovery_addr = mc.tell()
- mc.PUSH(edi)
- mc.PUSH(esi)
- mc.PUSH(ebp)
- mc.PUSH(esp) # <-- not really used, but needed to take up the space
- mc.PUSH(ebx)
- mc.PUSH(edx)
- mc.PUSH(ecx)
- mc.PUSH(eax)
- mc.MOV(esi, esp)
+
+ # Push all general purpose registers
+ for gpr in range(self.cpu.NUM_REGS-1, -1, -1):
+ mc.PUSH_r(gpr)
+
+ # ebx/rbx is callee-save in both i386 and x86-64
+ mc.MOV_rr(ebx.value, esp.value)
+
if withfloats:
- mc.SUB(esp, imm(8*8))
- for i in range(8):
- mc.MOVSD(mem64(esp, 8*i), xmm_registers[i])
+ # Push all float registers
+ mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
+ for i in range(self.cpu.NUM_REGS):
+ mc.MOVSD_sx(8*i, i)
# we call a provided function that will
# - call our on_leave_jitted_hook which will mark
@@ -1283,7 +1493,7 @@
# avoid unwarranted freeing
# - optionally save exception depending on the flag
addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
- mc.CALL(rel32(addr))
+ mc.CALL(imm(addr))
# the following call saves all values from the stack and from
# registers to the right 'fail_boxes_<type>' location.
@@ -1293,50 +1503,58 @@
# bytecode, pushed just before by the CALL instruction written by
# generate_quick_failure(). XXX misaligned stack in the call, but
# it's ok because failure_recovery_func is not calling anything more
- mc.PUSH(esi)
- mc.CALL(rel32(failure_recovery_func))
+
+ # XXX
+ if IS_X86_32:
+ mc.PUSH_r(ebx.value)
+ elif IS_X86_64:
+ mc.MOV_rr(edi.value, ebx.value)
+ # XXX: Correct to only align the stack on 64-bit?
+ mc.AND_ri(esp.value, -16)
+ else:
+ raise AssertionError("Shouldn't happen")
+
+ mc.CALL(imm(failure_recovery_func))
# returns in eax the fail_index
# now we return from the complete frame, which starts from
- # _assemble_bootstrap_code(). The LEA below throws away most
- # of the frame, including all the PUSHes that we did just above.
- mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
- mc.POP(edi) # [ebp-12]
- mc.POP(esi) # [ebp-8]
- mc.POP(ebx) # [ebp-4]
- mc.POP(ebp) # [ebp]
- mc.RET()
- self.mc2.done()
+ # _assemble_bootstrap_code(). The LEA in _call_footer below throws
+ # away most of the frame, including all the PUSHes that we did just
+ # above.
+
+ self._call_footer()
+ self.mc.done()
self.failure_recovery_code[exc + 2 * withfloats] = recovery_addr
def generate_failure(self, fail_index, locs, exc, locs_are_ref):
- mc = self.mc
+ self.mc._mc.begin_reuse_scratch_register()
for i in range(len(locs)):
loc = locs[i]
- if isinstance(loc, REG):
- if loc.width == 8:
+ if isinstance(loc, RegLoc):
+ if loc.is_xmm:
adr = self.fail_boxes_float.get_addr_for_num(i)
- mc.MOVSD(heap64(adr), loc)
+ self.mc.MOVSD(heap(adr), loc)
else:
if locs_are_ref[i]:
adr = self.fail_boxes_ptr.get_addr_for_num(i)
else:
adr = self.fail_boxes_int.get_addr_for_num(i)
- mc.MOV(heap(adr), loc)
+ self.mc.MOV(heap(adr), loc)
for i in range(len(locs)):
loc = locs[i]
- if not isinstance(loc, REG):
- if loc.width == 8:
- mc.MOVSD(xmm0, loc)
+ if not isinstance(loc, RegLoc):
+ if isinstance(loc, StackLoc) and loc.type == FLOAT:
+ self.mc.MOVSD_xb(xmm0.value, loc.value)
adr = self.fail_boxes_float.get_addr_for_num(i)
- mc.MOVSD(heap64(adr), xmm0)
+ self.mc.MOVSD(heap(adr), xmm0)
else:
if locs_are_ref[i]:
adr = self.fail_boxes_ptr.get_addr_for_num(i)
else:
adr = self.fail_boxes_int.get_addr_for_num(i)
- mc.MOV(eax, loc)
- mc.MOV(heap(adr), eax)
+ self.mc.MOV(eax, loc)
+ self.mc.MOV(heap(adr), eax)
+ self.mc._mc.end_reuse_scratch_register()
# we call a provided function that will
# - call our on_leave_jitted_hook which will mark
@@ -1344,28 +1562,31 @@
# avoid unwarranted freeing
# - optionally save exception depending on the flag
addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
- mc.CALL(rel32(addr))
+ self.mc.CALL(imm(addr))
+
+ self.mc.MOV_ri(eax.value, fail_index)
- mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
- mc.MOV(eax, imm(fail_index))
- mc.POP(edi) # [ebp-12]
- mc.POP(esi) # [ebp-8]
- mc.POP(ebx) # [ebp-4]
- mc.POP(ebp) # [ebp]
- mc.RET()
-
- @specialize.arg(2)
- def implement_guard(self, addr, emit_jump):
- emit_jump(rel32(addr))
+ # exit function
+ self._call_footer()
+
+ def implement_guard(self, guard_token, condition=None):
+ self.mc.reserve_bytes(guard_token.recovery_stub_size())
+ self.pending_guard_tokens.append(guard_token)
+ # XXX: These jumps are patched later, the self.mc.tell() are just
+ # dummy values
+ if condition:
+ self.mc.J_il(rx86.Conditions[condition], self.mc.tell())
+ else:
+ self.mc.JMP_l(self.mc.tell())
return self.mc.tell() - 4
def genop_call(self, op, arglocs, resloc):
sizeloc = arglocs[0]
- assert isinstance(sizeloc, IMM32)
+ assert isinstance(sizeloc, ImmedLoc)
size = sizeloc.value
if isinstance(op.args[0], Const):
- x = rel32(op.args[0].getint())
+ x = imm(op.args[0].getint())
else:
x = arglocs[1]
if x is eax:
@@ -1375,35 +1596,35 @@
self._emit_call(x, arglocs, 2, tmp=tmp)
- if isinstance(resloc, MODRM64):
- self.mc.FSTP(resloc)
+ if isinstance(resloc, StackLoc) and resloc.width == 8 and IS_X86_32:
+ self.mc.FSTP_b(resloc.value)
elif size == 1:
- self.mc.AND(eax, imm(0xff))
+ self.mc.AND_ri(eax.value, 0xff)
elif size == 2:
- self.mc.AND(eax, imm(0xffff))
+ self.mc.AND_ri(eax.value, 0xffff)
- def genop_guard_call_may_force(self, op, guard_op, addr,
+ def genop_guard_call_may_force(self, op, guard_op, guard_token,
arglocs, result_loc):
faildescr = guard_op.descr
fail_index = self.cpu.get_fail_descr_number(faildescr)
- self.mc.MOV(mem(ebp, FORCE_INDEX_OFS), imm(fail_index))
+ self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
self.genop_call(op, arglocs, result_loc)
- self.mc.CMP(mem(ebp, FORCE_INDEX_OFS), imm(0))
- return self.implement_guard(addr, self.mc.JL)
+ self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+ return self.implement_guard(guard_token, 'L')
- def genop_guard_call_assembler(self, op, guard_op, addr,
+ def genop_guard_call_assembler(self, op, guard_op, guard_token,
arglocs, result_loc):
faildescr = guard_op.descr
fail_index = self.cpu.get_fail_descr_number(faildescr)
- self.mc.MOV(mem(ebp, FORCE_INDEX_OFS), imm(fail_index))
+ self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
descr = op.descr
assert isinstance(descr, LoopToken)
assert len(arglocs) - 2 == len(descr._x86_arglocs[0])
#
# Write a call to the direct_bootstrap_code of the target assembler
- self._emit_call(rel32(descr._x86_direct_bootstrap_code), arglocs, 2,
+ self._emit_call(imm(descr._x86_direct_bootstrap_code), arglocs, 2,
tmp=eax)
- mc = self._start_block()
+ self.mc.ensure_bytes_available(256)
if op.result is None:
assert result_loc is None
value = self.cpu.done_with_this_frame_void_v
@@ -1419,26 +1640,27 @@
value = self.cpu.done_with_this_frame_float_v
else:
raise AssertionError(kind)
- mc.CMP(eax, imm(value))
- mc.JE(rel8_patched_later) # goto B if we get 'done_with_this_frame'
- je_location = mc.get_relative_pos()
+ self.mc.CMP_ri(eax.value, value)
+ # patched later
+ self.mc.J_il8(rx86.Conditions['E'], 0) # goto B if we get 'done_with_this_frame'
+ je_location = self.mc.get_relative_pos()
#
# Path A: use assembler_helper_adr
jd = descr.outermost_jitdriver_sd
assert jd is not None
asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
- self._emit_call(rel32(asm_helper_adr), [eax, arglocs[1]], 0,
- tmp=ecx, force_mc=True, mc=mc)
- if isinstance(result_loc, MODRM64):
- mc.FSTP(result_loc)
+ self._emit_call(imm(asm_helper_adr), [eax, arglocs[1]], 0,
+ tmp=ecx)
+ if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
+ self.mc.FSTP_b(result_loc.value)
#else: result_loc is already either eax or None, checked below
- mc.JMP(rel8_patched_later) # done
- jmp_location = mc.get_relative_pos()
+ self.mc.JMP_l8(0) # jump to done, patched later
+ jmp_location = self.mc.get_relative_pos()
#
# Path B: fast path. Must load the return value, and reset the token
offset = jmp_location - je_location
assert 0 < offset <= 127
- mc.overwrite(je_location - 1, [chr(offset)])
+ self.mc.overwrite(je_location - 1, [chr(offset)])
#
# Reset the vable token --- XXX really too much special logic here:-(
if jd.index_of_virtualizable >= 0:
@@ -1446,8 +1668,8 @@
fielddescr = jd.vable_token_descr
assert isinstance(fielddescr, BaseFieldDescr)
ofs = fielddescr.offset
- mc.MOV(eax, arglocs[1])
- mc.MOV(addr_add(eax, imm(ofs)), imm(0))
+ self.mc.MOV(eax, arglocs[1])
+ self.mc.MOV_mi((eax.value, ofs), 0)
# in the line above, TOKEN_NONE = 0
#
if op.result is not None:
@@ -1456,27 +1678,26 @@
if kind == FLOAT:
xmmtmp = X86XMMRegisterManager.all_regs[0]
adr = self.fail_boxes_float.get_addr_for_num(0)
- mc.MOVSD(xmmtmp, heap64(adr))
- mc.MOVSD(result_loc, xmmtmp)
+ self.mc.MOVSD(xmmtmp, heap(adr))
+ self.mc.MOVSD(result_loc, xmmtmp)
else:
assert result_loc is eax
if kind == INT:
adr = self.fail_boxes_int.get_addr_for_num(0)
- mc.MOV(eax, heap(adr))
+ self.mc.MOV(eax, heap(adr))
elif kind == REF:
adr = self.fail_boxes_ptr.get_addr_for_num(0)
- mc.XOR(eax, eax)
- mc.XCHG(eax, heap(adr))
+ self.mc.XOR_rr(eax.value, eax.value)
+ self.mc.XCHG(eax, heap(adr))
else:
raise AssertionError(kind)
#
# Here we join Path A and Path B again
- offset = mc.get_relative_pos() - jmp_location
+ offset = self.mc.get_relative_pos() - jmp_location
assert 0 <= offset <= 127
- mc.overwrite(jmp_location - 1, [chr(offset)])
- self._stop_block()
- self.mc.CMP(mem(ebp, FORCE_INDEX_OFS), imm(0))
- return self.implement_guard(addr, self.mc.JL)
+ self.mc.overwrite(jmp_location - 1, [chr(offset)])
+ self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
+ return self.implement_guard(guard_token, 'L')
def genop_discard_cond_call_gc_wb(self, op, arglocs):
# use 'mc._mc' directly instead of 'mc', to avoid
@@ -1486,31 +1707,41 @@
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
loc_base = arglocs[0]
- mc = self._start_block()
- mc.TEST(mem8(loc_base, descr.jit_wb_if_flag_byteofs),
- imm8(descr.jit_wb_if_flag_singlebyte))
- mc.JZ(rel8_patched_later)
- jz_location = mc.get_relative_pos()
+ self.mc.ensure_bytes_available(256)
+ self.mc.TEST8_mi((loc_base.value, descr.jit_wb_if_flag_byteofs),
+ descr.jit_wb_if_flag_singlebyte)
+ self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+ jz_location = self.mc.get_relative_pos()
# the following is supposed to be the slow path, so whenever possible
# we choose the most compact encoding over the most efficient one.
for i in range(len(arglocs)-1, -1, -1):
- mc.PUSH(arglocs[i])
+ self.mc.PUSH(arglocs[i])
+
+ if IS_X86_64:
+ # We clobber these registers to pass the arguments, but that's
+ # okay, because consider_cond_call_gc_wb makes sure that any
+ # caller-save registers with values in them are present in arglocs,
+ # so they are saved on the stack above and restored below
+ self.mc.MOV_rs(edi.value, 0)
+ self.mc.MOV_rs(esi.value, 8)
+
# misaligned stack in the call, but it's ok because the write barrier
# is not going to call anything more. Also, this assumes that the
# write barrier does not touch the xmm registers.
- mc.CALL(rel32(descr.get_write_barrier_fn(self.cpu)))
+ self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
for i in range(len(arglocs)):
loc = arglocs[i]
- assert isinstance(loc, REG)
- mc.POP(loc)
+ assert isinstance(loc, RegLoc)
+ self.mc.POP(loc)
# patch the JZ above
- offset = mc.get_relative_pos() - jz_location
+ offset = self.mc.get_relative_pos() - jz_location
assert 0 < offset <= 127
- mc.overwrite(jz_location-1, [chr(offset)])
- self._stop_block()
+ self.mc.overwrite(jz_location-1, [chr(offset)])
def genop_force_token(self, op, arglocs, resloc):
- self.mc.LEA(resloc, mem(ebp, FORCE_INDEX_OFS))
+ # RegAlloc.consider_force_token ensures this:
+ assert isinstance(resloc, RegLoc)
+ self.mc.LEA_rb(resloc.value, FORCE_INDEX_OFS)
def not_implemented_op_discard(self, op, arglocs):
msg = "not implemented operation: %s" % op.getopname()
@@ -1538,17 +1769,16 @@
return loop_token._x86_arglocs
def closing_jump(self, loop_token):
- self.mc.JMP(rel32(loop_token._x86_loop_code))
+ self.mc.JMP(imm(loop_token._x86_loop_code))
def malloc_cond_fixedsize(self, nursery_free_adr, nursery_top_adr,
size, tid):
- # don't use self.mc
- mc = self._start_block()
- mc.MOV(eax, heap(nursery_free_adr))
- mc.LEA(edx, addr_add(eax, imm(size)))
- mc.CMP(edx, heap(nursery_top_adr))
- mc.JNA(rel8_patched_later)
- jmp_adr = mc.get_relative_pos()
+ self.mc.ensure_bytes_available(256)
+ self.mc.MOV(eax, heap(nursery_free_adr))
+ self.mc.LEA_rm(edx.value, (eax.value, size))
+ self.mc.CMP(edx, heap(nursery_top_adr))
+ self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+ jmp_adr = self.mc.get_relative_pos()
# See comments in _build_malloc_fixedsize_slowpath for the
# details of the two helper functions that we are calling below.
@@ -1564,17 +1794,16 @@
# reserve room for the argument to the real malloc and the
# 8 saved XMM regs
self._regalloc.reserve_param(1+16)
- mc.CALL(rel32(slowpath_addr1))
+ self.mc.CALL(imm(slowpath_addr1))
self.mark_gc_roots()
slowpath_addr2 = self.malloc_fixedsize_slowpath2
- mc.CALL(rel32(slowpath_addr2))
+ self.mc.CALL(imm(slowpath_addr2))
- offset = mc.get_relative_pos() - jmp_adr
+ offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
- mc.overwrite(jmp_adr-1, [chr(offset)])
- mc.MOV(addr_add(eax, imm(0)), imm(tid))
- mc.MOV(heap(nursery_free_adr), edx)
- self._stop_block()
+ self.mc.overwrite(jmp_adr-1, [chr(offset)])
+ self.mc.MOV_mi((eax.value, 0), tid)
+ self.mc.MOV(heap(nursery_free_adr), edx)
genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
genop_list = [Assembler386.not_implemented_op] * rop._LAST
@@ -1594,32 +1823,20 @@
num = getattr(rop, opname.upper())
genop_list[num] = value
-def new_addr_add(heap, mem, memsib):
- def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
- if isinstance(reg_or_imm1, IMM32):
- if isinstance(reg_or_imm2, IMM32):
- return heap(reg_or_imm1.value + offset +
- (reg_or_imm2.value << scale))
- else:
- return memsib(None, reg_or_imm2, scale, reg_or_imm1.value + offset)
- else:
- if isinstance(reg_or_imm2, IMM32):
- return mem(reg_or_imm1, offset + (reg_or_imm2.value << scale))
- else:
- return memsib(reg_or_imm1, reg_or_imm2, scale, offset)
- return addr_add
-
-addr8_add = new_addr_add(heap8, mem8, memSIB8)
-addr_add = new_addr_add(heap, mem, memSIB)
-addr64_add = new_addr_add(heap64, mem64, memSIB64)
-
-def addr_add_const(reg_or_imm1, offset):
- if isinstance(reg_or_imm1, IMM32):
- return heap(reg_or_imm1.value + offset)
- else:
- return mem(reg_or_imm1, offset)
-
def round_up_to_4(size):
if size < 4:
return 4
return size
+
+# XXX: ri386 migration shims:
+def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
+ return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
+
+def addr_add_const(reg_or_imm1, offset):
+ return AddressLoc(reg_or_imm1, ImmedLoc(0), 0, offset)
+
+def mem(loc, offset):
+ return AddressLoc(loc, ImmedLoc(0), 0, offset)
+
+def heap(addr):
+ return AddressLoc(ImmedLoc(addr), ImmedLoc(0), 0, 0)
Modified: pypy/trunk/pypy/jit/backend/x86/codebuf.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/codebuf.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/codebuf.py Mon Aug 2 19:52:15 2010
@@ -2,12 +2,20 @@
import os, sys
from pypy.rpython.lltypesystem import lltype, rffi
from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.jit.backend.x86.ri386 import I386CodeBuilder
+from pypy.jit.backend.x86.rx86 import X86_32_CodeBuilder, X86_64_CodeBuilder
+from pypy.jit.backend.x86.regloc import LocationCodeBuilder
from pypy.rlib.rmmap import PTR, alloc, free
from pypy.rlib.debug import make_sure_not_resized
+from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64
+from pypy.rlib.objectmodel import we_are_translated
+# XXX: Seems nasty to change the superclass of InMemoryCodeBuilder like this
+if IS_X86_32:
+ codebuilder_cls = X86_32_CodeBuilder
+elif IS_X86_64:
+ codebuilder_cls = X86_64_CodeBuilder
-class InMemoryCodeBuilder(I386CodeBuilder):
+class InMemoryCodeBuilder(codebuilder_cls, LocationCodeBuilder):
_last_dump_start = 0
def __init__(self, start, end):
@@ -31,13 +39,15 @@
def write(self, listofchars):
self._pos = self.overwrite(self._pos, listofchars)
- def writechr(self, n):
- # purely for performance: don't make the one-element list [chr(n)]
+ def writechar(self, char):
pos = self._pos
assert pos + 1 <= self._size
- self._data[pos] = chr(n)
+ self._data[pos] = char
self._pos = pos + 1
+ def writechr(self, n):
+ self.writechar(chr(n))
+
def get_relative_pos(self):
return self._pos
@@ -50,11 +60,6 @@
self._pos = pos
self._last_dump_start = pos
- def execute(self, arg1, arg2):
- # XXX old testing stuff
- fnptr = rffi.cast(lltype.Ptr(BINARYFN), self._data)
- return fnptr(arg1, arg2)
-
def done(self):
# normally, no special action is needed here
if machine_code_dumper.enabled:
@@ -77,9 +82,6 @@
valgrind.discard_translations(self._data, self._size)
-BINARYFN = lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed)
-
-
class MachineCodeDumper:
enabled = True
log_fd = -1
@@ -107,7 +109,10 @@
return False
# log the executable name
from pypy.jit.backend.hlinfo import highleveljitinfo
- os.write(self.log_fd, 'BACKEND i386\n')
+ if IS_X86_32:
+ os.write(self.log_fd, 'BACKEND x86\n')
+ elif IS_X86_64:
+ os.write(self.log_fd, 'BACKEND x86_64\n')
if highleveljitinfo.sys_executable:
os.write(self.log_fd, 'SYS_EXECUTABLE %s\n' % (
highleveljitinfo.sys_executable,))
@@ -137,6 +142,12 @@
def __init__(self, map_size):
data = alloc(map_size)
+ if IS_X86_64 and not we_are_translated():
+ # Hack to make sure that mcs are not within 32-bits of one
+ # another for testing purposes
+ from pypy.rlib.rmmap import hint
+ hint.pos += 0xFFFFFFFF
+
self._init(data, map_size)
def __del__(self):
Modified: pypy/trunk/pypy/jit/backend/x86/jump.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/jump.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/jump.py Mon Aug 2 19:52:15 2010
@@ -1,23 +1,6 @@
import sys
from pypy.tool.pairtype import extendabletype
-from pypy.jit.backend.x86.ri386 import *
-
-class __extend__(OPERAND):
- __metaclass__ = extendabletype
- def _getregkey(self):
- raise AssertionError("should only happen to registers and frame "
- "positions")
-
-class __extend__(REG):
- __metaclass__ = extendabletype
- def _getregkey(self):
- return ~self.op
-
-class __extend__(MODRM):
- __metaclass__ = extendabletype
- def _getregkey(self):
- return self.position
-
+from pypy.jit.backend.x86.regloc import ImmedLoc, StackLoc
def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
pending_dests = len(dst_locations)
@@ -27,7 +10,7 @@
srccount[dst._getregkey()] = 0
for i in range(len(dst_locations)):
src = src_locations[i]
- if isinstance(src, IMM32):
+ if isinstance(src, ImmedLoc):
continue
key = src._getregkey()
if key in srccount:
@@ -46,7 +29,7 @@
srccount[key] = -1 # means "it's done"
pending_dests -= 1
src = src_locations[i]
- if not isinstance(src, IMM32):
+ if not isinstance(src, ImmedLoc):
key = src._getregkey()
if key in srccount:
srccount[key] -= 1
@@ -80,7 +63,7 @@
assert pending_dests == 0
def _move(assembler, src, dst, tmpreg):
- if isinstance(dst, MODRM) and isinstance(src, MODRM):
+ if dst.is_memory_reference() and src.is_memory_reference():
assembler.regalloc_mov(src, tmpreg)
src = tmpreg
assembler.regalloc_mov(src, dst)
Modified: pypy/trunk/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/regalloc.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/regalloc.py Mon Aug 2 19:52:15 2010
@@ -5,7 +5,7 @@
from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
ResOperation, BoxPtr,
LoopToken, INT, REF, FLOAT)
-from pypy.jit.backend.x86.ri386 import *
+from pypy.jit.backend.x86.regloc import *
from pypy.rpython.lltypesystem import lltype, ll2ctypes, rffi, rstr
from pypy.rlib.objectmodel import we_are_translated
from pypy.rlib import rgc
@@ -17,16 +17,7 @@
from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
TempBox
-
-WORD = 4
-FRAME_FIXED_SIZE = 5 # ebp + ebx + esi + edi + force_index = 5 words
-FORCE_INDEX_OFS = -4*WORD
-
-width_of_type = {
- INT : 1,
- REF : 1,
- FLOAT : 2,
- }
+from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE, IS_X86_32, IS_X86_64
class X86RegisterManager(RegisterManager):
@@ -50,6 +41,12 @@
print "convert_to_imm: got a %s" % c
raise AssertionError
+class X86_64_RegisterManager(X86RegisterManager):
+ # r11 omitted because it's used as scratch
+ all_regs = [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
+ no_lower_byte_regs = []
+ save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
+
class FloatConstants(object):
BASE_CONSTANT_SIZE = 1000
@@ -80,7 +77,6 @@
all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
# we never need lower byte I hope
save_around_call_regs = all_regs
- reg_width = 2
def __init__(self, longevity, frame_manager=None, assembler=None):
RegisterManager.__init__(self, longevity, frame_manager=frame_manager,
@@ -94,27 +90,35 @@
def convert_to_imm(self, c):
adr = self.float_constants.record_float(c.getfloat())
- return heap64(adr)
+ return AddressLoc(ImmedLoc(adr), ImmedLoc(0), 0, 0)
def after_call(self, v):
# the result is stored in st0, but we don't have this around,
# so genop_call will move it to some frame location immediately
# after the call
- return self.frame_manager.loc(v, 2)
+ return self.frame_manager.loc(v)
-class X86FrameManager(FrameManager):
+class X86_64_XMMRegisterManager(X86XMMRegisterManager):
+ # xmm15 reserved for scratch use
+ all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14]
+ save_around_call_regs = all_regs
+
+ def call_result_location(self, v):
+ return xmm0
+
+ def after_call(self, v):
+ # We use RegisterManager's implementation, since X86XMMRegisterManager
+ # places the result on the stack, which we don't need to do when the
+ # calling convention places the result in xmm0
+ return RegisterManager.after_call(self, v)
+class X86FrameManager(FrameManager):
@staticmethod
- def frame_pos(i, size):
- if size == 1:
- res = mem(ebp, get_ebp_ofs(i))
- elif size == 2:
- res = mem64(ebp, get_ebp_ofs(i + 1))
- else:
- print "Unimplemented size %d" % i
- raise NotImplementedError("unimplemented size %d" % i)
- res.position = i
- return res
+ def frame_pos(i, box_type):
+ if IS_X86_32 and box_type == FLOAT:
+ return StackLoc(i, get_ebp_ofs(i+1), 2, box_type)
+ else:
+ return StackLoc(i, get_ebp_ofs(i), 1, box_type)
class RegAlloc(object):
exc = False
@@ -135,11 +139,21 @@
# compute longevity of variables
longevity = self._compute_vars_longevity(inputargs, operations)
self.longevity = longevity
- self.rm = X86RegisterManager(longevity,
- frame_manager = self.fm,
- assembler = self.assembler)
- self.xrm = X86XMMRegisterManager(longevity, frame_manager = self.fm,
- assembler = self.assembler)
+ # XXX
+ if cpu.WORD == 4:
+ gpr_reg_mgr_cls = X86RegisterManager
+ xmm_reg_mgr_cls = X86XMMRegisterManager
+ elif cpu.WORD == 8:
+ gpr_reg_mgr_cls = X86_64_RegisterManager
+ xmm_reg_mgr_cls = X86_64_XMMRegisterManager
+ else:
+ raise AssertionError("Word size should be 4 or 8")
+
+ self.rm = gpr_reg_mgr_cls(longevity,
+ frame_manager = self.fm,
+ assembler = self.assembler)
+ self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
+ assembler = self.assembler)
def prepare_loop(self, inputargs, operations, looptoken):
self._prepare(inputargs, operations)
@@ -184,7 +198,7 @@
if reg:
loc = reg
else:
- loc = self.fm.loc(arg, width_of_type[arg.type])
+ loc = self.fm.loc(arg)
if arg.type == FLOAT:
floatlocs[i] = loc
else:
@@ -252,23 +266,23 @@
arg = inputargs[i]
i += 1
if arg.type == FLOAT:
- if isinstance(loc, REG):
+ if isinstance(loc, RegLoc):
self.xrm.reg_bindings[arg] = loc
used[loc] = None
else:
self.fm.frame_bindings[arg] = loc
else:
- if isinstance(loc, REG):
+ if isinstance(loc, RegLoc):
self.rm.reg_bindings[arg] = loc
used[loc] = None
else:
self.fm.frame_bindings[arg] = loc
self.rm.free_regs = []
- for reg in X86RegisterManager.all_regs:
+ for reg in self.rm.all_regs:
if reg not in used:
self.rm.free_regs.append(reg)
self.xrm.free_regs = []
- for reg in X86XMMRegisterManager.all_regs:
+ for reg in self.xrm.all_regs:
if reg not in used:
self.xrm.free_regs.append(reg)
# note: we need to make a copy of inputargs because possibly_free_vars
@@ -646,7 +660,7 @@
vable_index = jd.index_of_virtualizable
if vable_index >= 0:
self.rm._sync_var(op.args[vable_index])
- vable = self.fm.loc(op.args[vable_index], 1)
+ vable = self.fm.loc(op.args[vable_index])
else:
vable = imm(0)
self._call(op, [imm(size), vable] +
@@ -670,7 +684,7 @@
# function, a GC write barrier, is known not to touch them.
# See remember_young_pointer() in rpython/memory/gc/generation.py.
for v, reg in self.rm.reg_bindings.items():
- if ((reg is eax or reg is ecx or reg is edx)
+ if (reg in self.rm.save_around_call_regs
and self.rm.stays_alive(v)):
arglocs.append(reg)
self.PerformDiscard(op, arglocs)
@@ -809,7 +823,7 @@
def consider_setfield_gc(self, op):
ofs_loc, size_loc, ptr = self._unpack_fielddescr(op.descr)
- assert isinstance(size_loc, IMM32)
+ assert isinstance(size_loc, ImmedLoc)
if size_loc.value == 1:
need_lower_byte = True
else:
@@ -950,7 +964,7 @@
shape = gcrootmap.get_basic_shape()
for v, val in self.fm.frame_bindings.items():
if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
- assert isinstance(val, MODRM)
+ assert isinstance(val, StackLoc)
gcrootmap.add_ebp_offset(shape, get_ebp_ofs(val.position))
for v, reg in self.rm.reg_bindings.items():
if reg is eax:
Modified: pypy/trunk/pypy/jit/backend/x86/runner.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/runner.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/runner.py Mon Aug 2 19:52:15 2010
@@ -4,11 +4,13 @@
from pypy.rlib.objectmodel import we_are_translated
from pypy.jit.metainterp import history, compile
from pypy.jit.backend.x86.assembler import Assembler386
-from pypy.jit.backend.x86.regalloc import FORCE_INDEX_OFS
+from pypy.jit.backend.x86.arch import FORCE_INDEX_OFS
from pypy.jit.backend.x86.profagent import ProfileAgent
from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
+from pypy.jit.backend.x86 import regloc
+import sys
-class CPU386(AbstractLLCPU):
+class AbstractX86CPU(AbstractLLCPU):
debug = True
supports_floats = True
@@ -132,10 +134,28 @@
assert fail_index == fail_index_2
return faildescr
+class CPU386(AbstractX86CPU):
+ WORD = 4
+ NUM_REGS = 8
+ CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.esi, regloc.edi]
+ FRAME_FIXED_SIZE = len(CALLEE_SAVE_REGISTERS) + 2
+
+ def __init__(self, *args, **kwargs):
+ assert sys.maxint == (2**31 - 1)
+ super(CPU386, self).__init__(*args, **kwargs)
class CPU386_NO_SSE2(CPU386):
supports_floats = False
+class CPU_X86_64(AbstractX86CPU):
+ WORD = 8
+ NUM_REGS = 16
+ CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14, regloc.r15]
+ FRAME_FIXED_SIZE = len(CALLEE_SAVE_REGISTERS) + 2
+
+ def __init__(self, *args, **kwargs):
+ assert sys.maxint == (2**63 - 1)
+ super(CPU_X86_64, self).__init__(*args, **kwargs)
CPU = CPU386
Modified: pypy/trunk/pypy/jit/backend/x86/test/conftest.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/conftest.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/conftest.py Mon Aug 2 19:52:15 2010
@@ -3,6 +3,5 @@
cpu = detect_cpu.autodetect()
def pytest_runtest_setup(item):
- if cpu != 'x86':
- py.test.skip("x86 directory skipped: cpu is %r" % (cpu,))
-
+ if cpu not in ('x86', 'x86_64'):
+ py.test.skip("x86/x86_64 tests skipped: cpu is %r" % (cpu,))
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_assembler.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_assembler.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_assembler.py Mon Aug 2 19:52:15 2010
@@ -1,14 +1,19 @@
-from pypy.jit.backend.x86.ri386 import *
+from pypy.jit.backend.x86.regloc import *
from pypy.jit.backend.x86.assembler import Assembler386, MachineCodeBlockWrapper
from pypy.jit.backend.x86.regalloc import X86FrameManager, get_ebp_ofs
-from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat
+from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, INT, REF, FLOAT
from pypy.rlib.rarithmetic import intmask
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86_64_RegisterManager, X86XMMRegisterManager, X86_64_XMMRegisterManager
+ACTUAL_CPU = getcpuclass()
class FakeCPU:
rtyper = None
supports_floats = True
+ NUM_REGS = ACTUAL_CPU.NUM_REGS
class FakeMC:
def __init__(self, base_address=0):
@@ -25,7 +30,14 @@
self.content.append(("JMP", args))
def done(self):
pass
+ def PUSH_r(self, reg):
+ pass
+ def POP_r(self, reg):
+ pass
+class FakeAssembler:
+ def write_pending_failure_recoveries(self):
+ pass
def test_write_failure_recovery_description():
assembler = Assembler386(FakeCPU())
@@ -33,12 +45,12 @@
failargs = [BoxInt(), BoxPtr(), BoxFloat()] * 3
failargs.insert(6, None)
failargs.insert(7, None)
- locs = [X86FrameManager.frame_pos(0, 1),
- X86FrameManager.frame_pos(1, 1),
- X86FrameManager.frame_pos(10, 2),
- X86FrameManager.frame_pos(100, 1),
- X86FrameManager.frame_pos(101, 1),
- X86FrameManager.frame_pos(110, 2),
+ locs = [X86FrameManager.frame_pos(0, INT),
+ X86FrameManager.frame_pos(1, REF),
+ X86FrameManager.frame_pos(10, FLOAT),
+ X86FrameManager.frame_pos(100, INT),
+ X86FrameManager.frame_pos(101, REF),
+ X86FrameManager.frame_pos(110, FLOAT),
None,
None,
ebx,
@@ -46,17 +58,17 @@
xmm2]
assert len(failargs) == len(locs)
assembler.write_failure_recovery_description(mc, failargs, locs)
- nums = [Assembler386.DESCR_INT + 4*(8+0),
- Assembler386.DESCR_REF + 4*(8+1),
- Assembler386.DESCR_FLOAT + 4*(8+10),
- Assembler386.DESCR_INT + 4*(8+100),
- Assembler386.DESCR_REF + 4*(8+101),
- Assembler386.DESCR_FLOAT + 4*(8+110),
+ nums = [Assembler386.DESCR_INT + 4*(16+0),
+ Assembler386.DESCR_REF + 4*(16+1),
+ Assembler386.DESCR_FLOAT + 4*(16+10),
+ Assembler386.DESCR_INT + 4*(16+100),
+ Assembler386.DESCR_REF + 4*(16+101),
+ Assembler386.DESCR_FLOAT + 4*(16+110),
Assembler386.CODE_HOLE,
Assembler386.CODE_HOLE,
- Assembler386.DESCR_INT + 4*ebx.op,
- Assembler386.DESCR_REF + 4*esi.op,
- Assembler386.DESCR_FLOAT + 4*xmm2.op]
+ Assembler386.DESCR_INT + 4*ebx.value,
+ Assembler386.DESCR_REF + 4*esi.value,
+ Assembler386.DESCR_FLOAT + 4*xmm2.value]
double_byte_nums = []
for num in nums[3:6]:
double_byte_nums.append((num & 0x7F) | 0x80)
@@ -94,6 +106,9 @@
return lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
def get_random_float():
+ # Returns <float>, <low word>, <high word>
+ # NB: on 64-bit, <low word> will be the entire float and <high word>
+ # will be random garbage from malloc!
assert withfloats
value = random.random() - 0.5
# make sure it fits into 64 bits
@@ -101,9 +116,16 @@
rffi.cast(rffi.DOUBLEP, tmp)[0] = value
return rffi.cast(rffi.DOUBLEP, tmp)[0], tmp[0], tmp[1]
+ if IS_X86_32:
+ main_registers = X86RegisterManager.all_regs
+ xmm_registers = X86XMMRegisterManager.all_regs
+ elif IS_X86_64:
+ main_registers = X86_64_RegisterManager.all_regs
+ xmm_registers = X86_64_XMMRegisterManager.all_regs
+
# memory locations: 26 integers, 26 pointers, 26 floats
# main registers: half of them as signed and the other half as ptrs
- # xmm registers: all floats, from xmm0 to xmm7
+ # xmm registers: all floats, from xmm0 to xmm(7|15)
# holes: 8
locations = []
baseloc = 4
@@ -117,18 +139,17 @@
content = ([('int', locations.pop()) for _ in range(26)] +
[('ptr', locations.pop()) for _ in range(26)] +
[(['int', 'ptr'][random.randrange(0, 2)], reg)
- for reg in [eax, ecx, edx, ebx, esi, edi]])
+ for reg in main_registers])
if withfloats:
content += ([('float', locations.pop()) for _ in range(26)] +
- [('float', reg) for reg in [xmm0, xmm1, xmm2, xmm3,
- xmm4, xmm5, xmm6, xmm7]])
+ [('float', reg) for reg in xmm_registers])
for i in range(8):
content.append(('hole', None))
random.shuffle(content)
# prepare the expected target arrays, the descr_bytecode,
# the 'registers' and the 'stack' arrays according to 'content'
- xmmregisters = lltype.malloc(rffi.LONGP.TO, 16+9, flavor='raw')
+ xmmregisters = lltype.malloc(rffi.LONGP.TO, 16+ACTUAL_CPU.NUM_REGS+1, flavor='raw')
registers = rffi.ptradd(xmmregisters, 16)
stacklen = baseloc + 10
stack = lltype.malloc(rffi.LONGP.TO, stacklen, flavor='raw')
@@ -140,8 +161,8 @@
assert loc >= 0
ofs = get_ebp_ofs(loc)
assert ofs < 0
- assert (ofs % 4) == 0
- stack[stacklen + ofs//4] = value
+ assert (ofs % WORD) == 0
+ stack[stacklen + ofs//WORD] = value
descr_bytecode = []
for i, (kind, loc) in enumerate(content):
@@ -152,12 +173,18 @@
value, lo, hi = get_random_float()
expected_floats[i] = value
kind = Assembler386.DESCR_FLOAT
- if isinstance(loc, REG):
- xmmregisters[2*loc.op] = lo
- xmmregisters[2*loc.op+1] = hi
+ if isinstance(loc, RegLoc):
+ if WORD == 4:
+ xmmregisters[2*loc.value] = lo
+ xmmregisters[2*loc.value+1] = hi
+ elif WORD == 8:
+ xmmregisters[loc.value] = lo
else:
- write_in_stack(loc, hi)
- write_in_stack(loc+1, lo)
+ if WORD == 4:
+ write_in_stack(loc, hi)
+ write_in_stack(loc+1, lo)
+ elif WORD == 8:
+ write_in_stack(loc, lo)
else:
if kind == 'int':
value = get_random_int()
@@ -170,15 +197,15 @@
value = rffi.cast(rffi.LONG, value)
else:
assert 0, kind
- if isinstance(loc, REG):
- registers[loc.op] = value
+ if isinstance(loc, RegLoc):
+ registers[loc.value] = value
else:
write_in_stack(loc, value)
- if isinstance(loc, REG):
- num = kind + 4*loc.op
+ if isinstance(loc, RegLoc):
+ num = kind + 4*loc.value
else:
- num = kind + 4*(8+loc)
+ num = kind + Assembler386.CODE_FROMSTACK + (4*loc)
while num >= 0x80:
descr_bytecode.append((num & 0x7F) | 0x80)
num >>= 7
@@ -195,8 +222,8 @@
for i in range(len(descr_bytecode)):
assert 0 <= descr_bytecode[i] <= 255
descr_bytes[i] = rffi.cast(rffi.UCHAR, descr_bytecode[i])
- registers[8] = rffi.cast(rffi.LONG, descr_bytes)
- registers[ebp.op] = rffi.cast(rffi.LONG, stack) + 4*stacklen
+ registers[ACTUAL_CPU.NUM_REGS] = rffi.cast(rffi.LONG, descr_bytes)
+ registers[ebp.value] = rffi.cast(rffi.LONG, stack) + WORD*stacklen
# run!
assembler = Assembler386(FakeCPU())
@@ -237,7 +264,8 @@
def test_mc_wrapper_profile_agent():
agent = FakeProfileAgent()
- mc = FakeMCWrapper(100, agent)
+ assembler = FakeAssembler()
+ mc = FakeMCWrapper(assembler, 100, agent)
mc.start_function("abc")
mc.writechr("x")
mc.writechr("x")
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_basic.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_basic.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_basic.py Mon Aug 2 19:52:15 2010
@@ -1,5 +1,5 @@
import py
-from pypy.jit.backend.x86.runner import CPU386
+from pypy.jit.backend.detect_cpu import getcpuclass
from pypy.jit.metainterp.warmspot import ll_meta_interp
from pypy.jit.metainterp.test import test_basic
from pypy.jit.codewriter.policy import StopAtXPolicy
@@ -7,7 +7,7 @@
class Jit386Mixin(test_basic.LLJitMixin):
type_system = 'lltype'
- CPUClass = CPU386
+ CPUClass = getcpuclass()
def check_jumps(self, maxcount):
pass
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py Mon Aug 2 19:52:15 2010
@@ -9,13 +9,13 @@
from pypy.jit.codewriter import heaptracker
from pypy.jit.backend.llsupport.descr import GcCache
from pypy.jit.backend.llsupport.gc import GcLLDescription
-from pypy.jit.backend.x86.runner import CPU
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, FRAME_FIXED_SIZE
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.x86.regalloc import RegAlloc
+from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
from pypy.jit.metainterp.test.oparser import parse
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
from pypy.rpython.annlowlevel import llhelper
from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.x86.ri386 import *
from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr
from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
@@ -23,6 +23,8 @@
from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86FrameManager,\
X86XMMRegisterManager
+CPU = getcpuclass()
+
class MockGcRootMap(object):
def get_basic_shape(self):
return ['shape']
@@ -84,7 +86,7 @@
mark = regalloc.get_mark_gc_roots(cpu.gc_ll_descr.gcrootmap)
assert mark[0] == 'compressed'
base = -WORD * FRAME_FIXED_SIZE
- expected = ['ebx', 'esi', 'edi', base, base-4, base-8]
+ expected = ['ebx', 'esi', 'edi', base, base-WORD, base-WORD*2]
assert dict.fromkeys(mark[1:]) == dict.fromkeys(expected)
class TestRegallocGcIntegration(BaseTestRegalloc):
@@ -175,7 +177,7 @@
self.addrs[1] = self.addrs[0] + 64
# 64 bytes
def malloc_slowpath(size):
- assert size == 8
+ assert size == WORD*2
nadr = rffi.cast(lltype.Signed, self.nursery)
self.addrs[0] = nadr + size
return nadr
@@ -199,7 +201,7 @@
return rffi.cast(lltype.Signed, self.addrs)
def get_nursery_top_addr(self):
- return rffi.cast(lltype.Signed, self.addrs) + 4
+ return rffi.cast(lltype.Signed, self.addrs) + WORD
def get_malloc_fixedsize_slowpath_addr(self):
fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
@@ -213,7 +215,7 @@
def setup_method(self, method):
cpu = CPU(None, None)
- cpu.vtable_offset = 4
+ cpu.vtable_offset = WORD
cpu.gc_ll_descr = GCDescrFastpathMalloc()
NODE = lltype.Struct('node', ('tid', lltype.Signed),
@@ -249,7 +251,7 @@
assert gc_ll_descr.nursery[0] == self.nodedescr.tid
assert gc_ll_descr.nursery[1] == 42
nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- assert gc_ll_descr.addrs[0] == nurs_adr + 8
+ assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*2)
def test_malloc_slowpath(self):
ops = '''
@@ -269,7 +271,7 @@
# this should call slow path once
gc_ll_descr = self.cpu.gc_ll_descr
nadr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- assert gc_ll_descr.addrs[0] == nadr + 8
+ assert gc_ll_descr.addrs[0] == nadr + (WORD*2)
def test_new_with_vtable(self):
ops = '''
@@ -284,4 +286,4 @@
assert gc_ll_descr.nursery[0] == self.descrsize.tid
assert gc_ll_descr.nursery[1] == self.vtable_int
nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- assert gc_ll_descr.addrs[0] == nurs_adr + 12
+ assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*3)
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_jump.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_jump.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_jump.py Mon Aug 2 19:52:15 2010
@@ -1,6 +1,7 @@
-from pypy.jit.backend.x86.ri386 import *
+from pypy.jit.backend.x86.regloc import *
from pypy.jit.backend.x86.regalloc import X86FrameManager
from pypy.jit.backend.x86.jump import remap_frame_layout
+from pypy.jit.metainterp.history import INT
frame_pos = X86FrameManager.frame_pos
@@ -25,7 +26,7 @@
continue
assert len(op1) == len(op2)
for x, y in zip(op1, op2):
- if isinstance(x, MODRM) and isinstance(y, MODRM):
+ if isinstance(x, StackLoc) and isinstance(y, MODRM):
assert x.byte == y.byte
assert x.extradata == y.extradata
else:
@@ -41,9 +42,9 @@
remap_frame_layout(assembler, [eax, ebx, ecx, edx, esi, edi],
[eax, ebx, ecx, edx, esi, edi], '?')
assert assembler.ops == []
- s8 = frame_pos(1, 1)
- s12 = frame_pos(31, 1)
- s20 = frame_pos(6, 1)
+ s8 = frame_pos(1, INT)
+ s12 = frame_pos(31, INT)
+ s20 = frame_pos(6, INT)
remap_frame_layout(assembler, [eax, ebx, ecx, s20, s8, edx, s12, esi, edi],
[eax, ebx, ecx, s20, s8, edx, s12, esi, edi],
'?')
@@ -58,10 +59,10 @@
def test_simple_framelocs():
assembler = MockAssembler()
- s8 = frame_pos(0, 1)
- s12 = frame_pos(13, 1)
- s20 = frame_pos(20, 1)
- s24 = frame_pos(221, 1)
+ s8 = frame_pos(0, INT)
+ s12 = frame_pos(13, INT)
+ s20 = frame_pos(20, INT)
+ s24 = frame_pos(221, INT)
remap_frame_layout(assembler, [s8, eax, s12], [s20, s24, edi], edx)
assert assembler.ops == [('mov', s8, edx),
('mov', edx, s20),
@@ -70,10 +71,10 @@
def test_reordering():
assembler = MockAssembler()
- s8 = frame_pos(8, 1)
- s12 = frame_pos(12, 1)
- s20 = frame_pos(19, 1)
- s24 = frame_pos(1, 1)
+ s8 = frame_pos(8, INT)
+ s12 = frame_pos(12, INT)
+ s20 = frame_pos(19, INT)
+ s24 = frame_pos(1, INT)
remap_frame_layout(assembler, [eax, s8, s20, ebx],
[s8, ebx, eax, edi], '?')
assert assembler.got([('mov', ebx, edi),
@@ -83,10 +84,10 @@
def test_cycle():
assembler = MockAssembler()
- s8 = frame_pos(8, 1)
- s12 = frame_pos(12, 1)
- s20 = frame_pos(19, 1)
- s24 = frame_pos(1, 1)
+ s8 = frame_pos(8, INT)
+ s12 = frame_pos(12, INT)
+ s20 = frame_pos(19, INT)
+ s24 = frame_pos(1, INT)
remap_frame_layout(assembler, [eax, s8, s20, ebx],
[s8, ebx, eax, s20], '?')
assert assembler.got([('push', s8),
@@ -97,12 +98,12 @@
def test_cycle_2():
assembler = MockAssembler()
- s8 = frame_pos(8, 1)
- s12 = frame_pos(12, 1)
- s20 = frame_pos(19, 1)
- s24 = frame_pos(1, 1)
- s2 = frame_pos(2, 1)
- s3 = frame_pos(3, 1)
+ s8 = frame_pos(8, INT)
+ s12 = frame_pos(12, INT)
+ s20 = frame_pos(19, INT)
+ s24 = frame_pos(1, INT)
+ s2 = frame_pos(2, INT)
+ s3 = frame_pos(3, INT)
remap_frame_layout(assembler,
[eax, s8, edi, s20, eax, s20, s24, esi, s2, s3],
[s8, s20, edi, eax, edx, s24, ebx, s12, s3, s2],
@@ -127,14 +128,14 @@
remap_frame_layout(assembler, [c3], [eax], '?')
assert assembler.ops == [('mov', c3, eax)]
assembler = MockAssembler()
- s12 = frame_pos(12, 1)
+ s12 = frame_pos(12, INT)
remap_frame_layout(assembler, [c3], [s12], '?')
assert assembler.ops == [('mov', c3, s12)]
def test_constants_and_cycle():
assembler = MockAssembler()
c3 = imm(3)
- s12 = frame_pos(13, 1)
+ s12 = frame_pos(13, INT)
remap_frame_layout(assembler, [ebx, c3, s12],
[s12, eax, ebx], edi)
assert assembler.ops == [('mov', c3, eax),
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_recompilation.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_recompilation.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_recompilation.py Mon Aug 2 19:52:15 2010
@@ -1,6 +1,5 @@
-
-from pypy.jit.backend.x86.runner import CPU
from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
+from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64
class TestRecompilation(BaseTestRegalloc):
def test_compile_bridge_not_deeper(self):
@@ -51,7 +50,9 @@
descr = loop.operations[2].descr
new = descr._x86_bridge_frame_depth
assert descr._x86_bridge_param_depth == 0
- assert new > previous
+ # XXX: Maybe add enough ops to force stack on 64-bit as well?
+ if IS_X86_32:
+ assert new > previous
self.cpu.set_future_value_int(0, 0)
fail = self.run(loop)
assert fail.identifier == 2
@@ -111,7 +112,9 @@
guard_op = loop.operations[5]
loop_frame_depth = loop.token._x86_frame_depth
assert loop.token._x86_param_depth == 0
- assert guard_op.descr._x86_bridge_frame_depth > loop_frame_depth
+ # XXX: Maybe add enough ops to force stack on 64-bit as well?
+ if IS_X86_32:
+ assert guard_op.descr._x86_bridge_frame_depth > loop_frame_depth
assert guard_op.descr._x86_bridge_param_depth == 0
self.cpu.set_future_value_int(0, 0)
self.cpu.set_future_value_int(1, 0)
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_regalloc.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_regalloc.py Mon Aug 2 19:52:15 2010
@@ -7,15 +7,17 @@
BoxPtr, ConstPtr, LoopToken, BasicFailDescr
from pypy.jit.metainterp.resoperation import rop, ResOperation
from pypy.jit.backend.llsupport.descr import GcCache
-from pypy.jit.backend.x86.runner import CPU
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, X86RegisterManager,\
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.x86.regalloc import RegAlloc, X86RegisterManager,\
FloatConstants
+from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64
from pypy.jit.metainterp.test.oparser import parse
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
from pypy.rpython.annlowlevel import llhelper
from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.x86.ri386 import *
+from pypy.jit.backend.x86.rx86 import *
+CPU = getcpuclass()
class MockGcDescr(GcCache):
def get_funcptr_for_new(self):
return 123
@@ -92,13 +94,20 @@
def f2(x, y):
return x*y
+ def f10(*args):
+ assert len(args) == 10
+ return sum(args)
+
F1PTR = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed))
F2PTR = lltype.Ptr(lltype.FuncType([lltype.Signed]*2, lltype.Signed))
+ F10PTR = lltype.Ptr(lltype.FuncType([lltype.Signed]*10, lltype.Signed))
f1ptr = llhelper(F1PTR, f1)
f2ptr = llhelper(F2PTR, f2)
+ f10ptr = llhelper(F10PTR, f10)
f1_calldescr = cpu.calldescrof(F1PTR.TO, F1PTR.TO.ARGS, F1PTR.TO.RESULT)
f2_calldescr = cpu.calldescrof(F2PTR.TO, F2PTR.TO.ARGS, F2PTR.TO.RESULT)
+ f10_calldescr = cpu.calldescrof(F10PTR.TO, F10PTR.TO.ARGS, F10PTR.TO.RESULT)
namespace = locals().copy()
type_system = 'lltype'
@@ -541,6 +550,12 @@
assert self.getints(9) == [0, 1, 1, 1, 1, 1, 1, 1, 1]
class TestRegAllocCallAndStackDepth(BaseTestRegalloc):
+ def expected_param_depth(self, num_args):
+ # Assumes the arguments are all non-float
+ if IS_X86_32:
+ return num_args
+ elif IS_X86_64:
+ return max(num_args - 6, 0)
def test_one_call(self):
ops = '''
@@ -550,7 +565,7 @@
'''
loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 9])
assert self.getints(11) == [5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9]
- assert loop.token._x86_param_depth == 1
+ assert loop.token._x86_param_depth == self.expected_param_depth(1)
def test_two_calls(self):
ops = '''
@@ -561,8 +576,21 @@
'''
loop = self.interpret(ops, [4, 7, 9, 9 ,9, 9, 9, 9, 9, 9, 9])
assert self.getints(11) == [5*7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9]
- assert loop.token._x86_param_depth == 2
-
+ assert loop.token._x86_param_depth == self.expected_param_depth(2)
+
+ def test_call_many_arguments(self):
+ # NB: The first and last arguments in the call are constants. This
+ # is primarily for x86-64, to ensure that loading a constant to an
+ # argument register or to the stack works correctly
+ ops = '''
+ [i0, i1, i2, i3, i4, i5, i6, i7]
+ i8 = call(ConstClass(f10ptr), 1, i0, i1, i2, i3, i4, i5, i6, i7, 10, descr=f10_calldescr)
+ finish(i8)
+ '''
+ loop = self.interpret(ops, [2, 3, 4, 5, 6, 7, 8, 9])
+ assert self.getint(0) == 55
+ assert loop.token._x86_param_depth == self.expected_param_depth(10)
+
def test_bridge_calls_1(self):
ops = '''
[i0, i1]
@@ -579,7 +607,7 @@
'''
bridge = self.attach_bridge(ops, loop, -2)
- assert loop.operations[-2].descr._x86_bridge_param_depth == 2
+ assert loop.operations[-2].descr._x86_bridge_param_depth == self.expected_param_depth(2)
self.cpu.set_future_value_int(0, 4)
self.cpu.set_future_value_int(1, 7)
@@ -602,7 +630,7 @@
'''
bridge = self.attach_bridge(ops, loop, -2)
- assert loop.operations[-2].descr._x86_bridge_param_depth == 2
+ assert loop.operations[-2].descr._x86_bridge_param_depth == self.expected_param_depth(2)
self.cpu.set_future_value_int(0, 4)
self.cpu.set_future_value_int(1, 7)
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_regalloc2.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_regalloc2.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_regalloc2.py Mon Aug 2 19:52:15 2010
@@ -2,7 +2,9 @@
from pypy.jit.metainterp.history import ResOperation, BoxInt, ConstInt,\
BoxPtr, ConstPtr, BasicFailDescr, LoopToken
from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.backend.x86.runner import CPU
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.x86.arch import WORD
+CPU = getcpuclass()
def test_bug_rshift():
v1 = BoxInt()
@@ -281,5 +283,8 @@
assert cpu.get_latest_value_int(16) == -57344
assert cpu.get_latest_value_int(17) == 1
assert cpu.get_latest_value_int(18) == -1
- assert cpu.get_latest_value_int(19) == -2147483648
+ if WORD == 4:
+ assert cpu.get_latest_value_int(19) == -2147483648
+ elif WORD == 8:
+ assert cpu.get_latest_value_int(19) == 19327352832
assert cpu.get_latest_value_int(20) == -49
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_runner.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_runner.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_runner.py Mon Aug 2 19:52:15 2010
@@ -2,9 +2,9 @@
from pypy.rpython.lltypesystem import lltype, llmemory, rffi, rstr, rclass
from pypy.jit.metainterp.history import ResOperation, LoopToken
from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstPtr,
- Box, BasicFailDescr)
-from pypy.jit.backend.x86.runner import CPU
-from pypy.jit.backend.x86.regalloc import WORD
+ Box, BoxFloat, BasicFailDescr)
+from pypy.jit.backend.detect_cpu import getcpuclass
+from pypy.jit.backend.x86.arch import WORD
from pypy.jit.backend.llsupport import symbolic
from pypy.jit.metainterp.resoperation import rop
from pypy.jit.metainterp.executor import execute
@@ -15,6 +15,8 @@
import sys
import os
+CPU = getcpuclass()
+
class FakeStats(object):
pass
@@ -59,7 +61,7 @@
assert u.chars[3] == u'd'
@staticmethod
- def _resbuf(res, item_tp=ctypes.c_int):
+ def _resbuf(res, item_tp=ctypes.c_long):
return ctypes.cast(res.value._obj.intval, ctypes.POINTER(item_tp))
def test_allocations(self):
@@ -74,8 +76,11 @@
return ctypes.cast(buf, ctypes.c_void_p).value
func = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int)(f)
addr = ctypes.cast(func, ctypes.c_void_p).value
+ # ctypes produces an unsigned value. We need it to be signed for, eg,
+ # relative addressing to work properly.
+ addr = rffi.cast(lltype.Signed, addr)
- self.cpu.assembler.make_sure_mc_exists()
+ self.cpu.assembler.setup()
self.cpu.assembler.malloc_func_addr = addr
ofs = symbolic.get_field_token(rstr.STR, 'chars', False)[0]
@@ -360,7 +365,9 @@
self.cpu.compile_bridge(faildescr1, [i1b], bridge)
name, address, size = agent.functions[1]
assert name == "Bridge # 0: bye"
- assert address == loopaddress + loopsize
+ # Would be exactly ==, but there are some guard failure recovery
+ # stubs in-between
+ assert address >= loopaddress + loopsize
assert size >= 10 # randomish number
self.cpu.set_future_value_int(0, 2)
@@ -386,7 +393,7 @@
ops.append(ResOperation(rop.FINISH, [v], None,
descr=BasicFailDescr()))
looptoken = LoopToken()
- self.cpu.assembler.make_sure_mc_exists()
+ self.cpu.assembler.setup()
old_mc_mc = self.cpu.assembler.mc._mc
self.cpu.compile_loop([base_v], ops, looptoken)
assert self.cpu.assembler.mc._mc != old_mc_mc # overflowed
@@ -394,6 +401,63 @@
self.cpu.execute_token(looptoken)
assert self.cpu.get_latest_value_int(0) == 1024
+ def test_overflow_guard_float_cmp(self):
+ # The float comparisons on x86 tend to use small relative jumps,
+ # which may run into trouble if they fall on the edge of a
+ # MachineCodeBlock change.
+ a = BoxFloat(1.0)
+ b = BoxFloat(2.0)
+ failed = BoxInt(41)
+ finished = BoxInt(42)
+
+ # We select guards that will always succeed, so that execution will
+ # continue through the entire set of comparisions
+ ops_to_test = (
+ (rop.FLOAT_LT, [a, b], rop.GUARD_TRUE),
+ (rop.FLOAT_LT, [b, a], rop.GUARD_FALSE),
+
+ (rop.FLOAT_LE, [a, a], rop.GUARD_TRUE),
+ (rop.FLOAT_LE, [a, b], rop.GUARD_TRUE),
+ (rop.FLOAT_LE, [b, a], rop.GUARD_FALSE),
+
+ (rop.FLOAT_EQ, [a, a], rop.GUARD_TRUE),
+ (rop.FLOAT_EQ, [a, b], rop.GUARD_FALSE),
+
+ (rop.FLOAT_NE, [a, b], rop.GUARD_TRUE),
+ (rop.FLOAT_NE, [a, a], rop.GUARD_FALSE),
+
+ (rop.FLOAT_GT, [b, a], rop.GUARD_TRUE),
+ (rop.FLOAT_GT, [a, b], rop.GUARD_FALSE),
+
+ (rop.FLOAT_GE, [a, a], rop.GUARD_TRUE),
+ (rop.FLOAT_GE, [b, a], rop.GUARD_TRUE),
+ (rop.FLOAT_GE, [a, b], rop.GUARD_FALSE),
+ )
+
+ for float_op, args, guard_op in ops_to_test:
+ ops = []
+
+ for i in range(200):
+ cmp_result = BoxInt()
+ ops.append(ResOperation(float_op, args, cmp_result))
+ ops.append(ResOperation(guard_op, [cmp_result], None, descr=BasicFailDescr()))
+ ops[-1].fail_args = [failed]
+
+ ops.append(ResOperation(rop.FINISH, [finished], None, descr=BasicFailDescr()))
+
+ looptoken = LoopToken()
+ self.cpu.compile_loop([a, b, failed, finished], ops, looptoken)
+ self.cpu.set_future_value_float(0, a.value)
+ self.cpu.set_future_value_float(1, b.value)
+ self.cpu.set_future_value_int(2, failed.value)
+ self.cpu.set_future_value_int(3, finished.value)
+ self.cpu.execute_token(looptoken)
+
+ # Really just a sanity check. We're actually interested in
+ # whether the test segfaults.
+ assert self.cpu.get_latest_value_int(0) == finished.value
+
+
class TestDebuggingAssembler(object):
def setup_method(self, meth):
self.pypylog = os.environ.get('PYPYLOG', None)
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_symbolic_x86.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_symbolic_x86.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_symbolic_x86.py Mon Aug 2 19:52:15 2010
@@ -1,6 +1,7 @@
import py
from pypy.jit.backend.llsupport.symbolic import *
from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.jit.backend.x86.arch import WORD
# This test file is here and not in llsupport/test/ because it checks
# that we get correct numbers for a 32-bit machine.
@@ -19,32 +20,32 @@
ofs_z, size_z = get_field_token(S, 'z', False)
# ofs_x might be 0 or not, depending on how we count the headers
# but the rest should be as expected for a 386 machine
- assert size_x == size_y == size_z == 4
+ assert size_x == size_y == size_z == WORD
assert ofs_x >= 0
- assert ofs_y == ofs_x + 4
- assert ofs_z == ofs_x + 8
+ assert ofs_y == ofs_x + WORD
+ assert ofs_z == ofs_x + (WORD*2)
def test_struct_size():
ofs_z, size_z = get_field_token(S, 'z', False)
totalsize = get_size(S, False)
- assert totalsize == ofs_z + 4
+ assert totalsize == ofs_z + WORD
def test_primitive_size():
- assert get_size(lltype.Signed, False) == 4
+ assert get_size(lltype.Signed, False) == WORD
assert get_size(lltype.Char, False) == 1
- assert get_size(lltype.Ptr(S), False) == 4
+ assert get_size(lltype.Ptr(S), False) == WORD
def test_array_token():
A = lltype.GcArray(lltype.Char)
basesize, itemsize, ofs_length = get_array_token(A, False)
- assert basesize >= 4 # at least the 'length', maybe some gc headers
+ assert basesize >= WORD # at least the 'length', maybe some gc headers
assert itemsize == 1
- assert ofs_length == basesize - 4
+ assert ofs_length == basesize - WORD
A = lltype.GcArray(lltype.Signed)
basesize, itemsize, ofs_length = get_array_token(A, False)
- assert basesize >= 4 # at least the 'length', maybe some gc headers
- assert itemsize == 4
- assert ofs_length == basesize - 4
+ assert basesize >= WORD # at least the 'length', maybe some gc headers
+ assert itemsize == WORD
+ assert ofs_length == basesize - WORD
def test_varsized_struct_size():
S1 = lltype.GcStruct('S1', ('parent', S),
@@ -54,9 +55,9 @@
ofs_extra, size_extra = get_field_token(S1, 'extra', False)
basesize, itemsize, ofs_length = get_array_token(S1, False)
assert size_parent == ofs_extra
- assert size_extra == 4
- assert ofs_length == ofs_extra + 4
- assert basesize == ofs_length + 4
+ assert size_extra == WORD
+ assert ofs_length == ofs_extra + WORD
+ assert basesize == ofs_length + WORD
assert itemsize == 1
def test_string():
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_zll_random.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_zll_random.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_zll_random.py Mon Aug 2 19:52:15 2010
@@ -1,9 +1,11 @@
from pypy.jit.backend.test.test_random import check_random_function, Random
from pypy.jit.backend.test.test_ll_random import LLtypeOperationBuilder
-from pypy.jit.backend.x86.runner import CPU386
+from pypy.jit.backend.detect_cpu import getcpuclass
+
+CPU = getcpuclass()
def test_stress():
- cpu = CPU386(None, None)
+ cpu = CPU(None, None)
r = Random()
for i in range(1000):
check_random_function(cpu, LLtypeOperationBuilder, r, i, 1000)
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py Mon Aug 2 19:52:15 2010
@@ -17,6 +17,8 @@
from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
from pypy.tool.udir import udir
+from pypy.jit.backend.x86.arch import IS_X86_64
+import py.test
class X(object):
def __init__(self, x=0):
@@ -126,6 +128,10 @@
class TestCompileHybrid(object):
def setup_class(cls):
+ if IS_X86_64:
+ # No hybrid GC on 64-bit for the time being
+ py.test.skip()
+
funcs = []
name_to_func = {}
for fullname in dir(cls):
Modified: pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py Mon Aug 2 19:52:15 2010
@@ -3,13 +3,14 @@
from pypy.rlib.jit import JitDriver, OPTIMIZER_FULL, unroll_parameters
from pypy.rlib.jit import PARAMETERS, dont_look_inside
from pypy.jit.metainterp.jitprof import Profiler
-from pypy.jit.backend.x86.runner import CPU386
+from pypy.jit.backend.detect_cpu import getcpuclass
from pypy.jit.backend.test.support import CCompiledMixin
from pypy.jit.codewriter.policy import StopAtXPolicy
from pypy.translator.translator import TranslationContext
+from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64
class TestTranslationX86(CCompiledMixin):
- CPUClass = CPU386
+ CPUClass = getcpuclass()
def _check_cbuilder(self, cbuilder):
# We assume here that we have sse2. If not, the CPUClass
@@ -114,7 +115,7 @@
class TestTranslationRemoveTypePtrX86(CCompiledMixin):
- CPUClass = CPU386
+ CPUClass = getcpuclass()
def _get_TranslationContext(self):
t = TranslationContext()
@@ -125,6 +126,10 @@
return t
def test_external_exception_handling_translates(self):
+ # FIXME
+ if IS_X86_64:
+ import py.test; py.test.skip()
+
jitdriver = JitDriver(greens = [], reds = ['n', 'total'])
class ImDone(Exception):
Modified: pypy/trunk/pypy/jit/backend/x86/tool/viewcode.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/tool/viewcode.py (original)
+++ pypy/trunk/pypy/jit/backend/x86/tool/viewcode.py Mon Aug 2 19:52:15 2010
@@ -31,16 +31,22 @@
if sys.platform == "win32":
XXX # lots more in Psyco
-def machine_code_dump(data, originaddr):
- # the disassembler to use. 'objdump' writes GNU-style instructions.
- # 'ndisasm' would use Intel syntax, but you need to fix the output parsing.
- objdump = ('objdump -M intel -b binary -m i386 '
+def machine_code_dump(data, originaddr, backend_name):
+ objdump_backend_option = {
+ 'x86': 'i386',
+ 'x86_64': 'x86-64',
+ }
+ objdump = ('objdump -M intel,%(backend)s -b binary -m i386 '
'--adjust-vma=%(origin)d -D %(file)s')
#
f = open(tmpfile, 'wb')
f.write(data)
f.close()
- g = os.popen(objdump % {'file': tmpfile, 'origin': originaddr}, 'r')
+ g = os.popen(objdump % {
+ 'file': tmpfile,
+ 'origin': originaddr,
+ 'backend': objdump_backend_option[backend_name],
+ }, 'r')
result = g.readlines()
g.close()
return result[6:] # drop some objdump cruft
@@ -126,7 +132,7 @@
def disassemble(self):
if not hasattr(self, 'text'):
- lines = machine_code_dump(self.data, self.addr)
+ lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
# instead of adding symbol names in the dumps we could
# also make the 0xNNNNNNNN addresses be red and show the
# symbol name when the mouse is over them
@@ -171,10 +177,13 @@
self.jumps = {}
self.symbols = {}
self.logentries = {}
+ self.backend_name = None
def parse(self, f, textonly=True):
for line in f:
- if line.startswith('CODE_DUMP '):
+ if line.startswith('BACKEND '):
+ self.backend_name = line.split(' ')[1].strip()
+ elif line.startswith('CODE_DUMP '):
pieces = line.split()
assert pieces[1].startswith('@')
assert pieces[2].startswith('+')
Modified: pypy/trunk/pypy/module/signal/interp_signal.py
==============================================================================
--- pypy/trunk/pypy/module/signal/interp_signal.py (original)
+++ pypy/trunk/pypy/module/signal/interp_signal.py Mon Aug 2 19:52:15 2010
@@ -7,6 +7,7 @@
from pypy.translator.tool.cbuild import ExternalCompilationInfo
import py
from pypy.tool import autopath
+from pypy.rlib import jit
def setup():
for key, value in cpy_signal.__dict__.items():
@@ -159,10 +160,12 @@
return space.wrap(SIG_DFL)
getsignal.unwrap_spec = [ObjSpace, int]
+ at jit.dont_look_inside
def alarm(space, timeout):
return space.wrap(c_alarm(timeout))
alarm.unwrap_spec = [ObjSpace, int]
+ at jit.dont_look_inside
def pause(space):
c_pause()
return space.w_None
@@ -173,6 +176,7 @@
raise OperationError(space.w_ValueError,
space.wrap("signal number out of range"))
+ at jit.dont_look_inside
def signal(space, signum, w_handler):
"""
signal(sig, action) -> action
More information about the Pypy-commit
mailing list