[pypy-svn] r75296 - in pypy/branch/x86-64-jit-backend/pypy/jit/backend: test x86 x86/test x86/tool
jcreigh at codespeak.net
jcreigh at codespeak.net
Sat Jun 12 00:19:25 CEST 2010
Author: jcreigh
Date: Sat Jun 12 00:19:21 2010
New Revision: 75296
Modified:
pypy/branch/x86-64-jit-backend/pypy/jit/backend/test/runner_test.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/codebuf.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regalloc.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/runner.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_gc_integration.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_recompilation.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc2.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_symbolic_x86.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_zll_random.py
pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/tool/viewcode.py
Log:
first shot at real 64-bit support (doesn't translate, pretty ugly in places, some stuff is still broken)
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/test/runner_test.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/test/runner_test.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/test/runner_test.py Sat Jun 12 00:19:21 2010
@@ -465,6 +465,25 @@
[funcbox] + args,
'float', descr=calldescr)
assert abs(res.value - 4.6) < 0.0001
+
+ def test_call_many_arguments(self):
+ # Test calling a function with a large number of arguments (more than
+ # 6, which will force passing some arguments on the stack on 64-bit)
+
+ def func(*args):
+ assert len(args) == 16
+ # Try to sum up args in a way that would probably detect a
+ # transposed argument
+ return sum(arg * (2**i) for i, arg in enumerate(args))
+
+ FUNC = self.FuncType([lltype.Signed]*16, lltype.Signed)
+ FPTR = self.Ptr(FUNC)
+ calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+ func_ptr = llhelper(FPTR, func)
+ args = range(16)
+ funcbox = self.get_funcbox(self.cpu, func_ptr)
+ res = self.execute_operation(rop.CALL, [funcbox] + map(BoxInt, args), 'int', descr=calldescr)
+ assert res.value == func(*args)
def test_call_stack_alignment(self):
# test stack alignment issues, notably for Mac OS/X.
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/assembler.py Sat Jun 12 00:19:21 2010
@@ -15,6 +15,10 @@
esp, ebp, esi, edi,
xmm0, xmm1, xmm2, xmm3,
xmm4, xmm5, xmm6, xmm7,
+ r8, r9, r10, r11,
+ r12, r13, r14, r15,
+ X86_64_SCRATCH_REG,
+ X86_64_XMM_SCRATCH_REG,
RegLoc, StackLoc,
ImmedLoc, AddressLoc, imm, rel32)
@@ -30,7 +34,7 @@
# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
# better safe than sorry
-CALL_ALIGN = 4
+CALL_ALIGN = 16 // WORD
def align_stack_words(words):
return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
@@ -178,6 +182,7 @@
self._build_malloc_fixedsize_slowpath()
def _build_float_constants(self):
+ """
# 11 words: 8 words for the data, and up to 3 words for alignment
addr = lltype.malloc(rffi.CArray(lltype.Signed), 11, flavor='raw')
if not we_are_translated():
@@ -195,23 +200,45 @@
addr[7] = 0 #
self.float_const_neg_addr = float_constants
self.float_const_abs_addr = float_constants + 16
+ """
+
+ # 44 bytes: 32 bytes for the data, and up to 12 bytes for alignment
+ addr = lltype.malloc(rffi.CArray(lltype.Char), 44, flavor='raw')
+ if not we_are_translated():
+ self._keepalive_malloced_float_consts = addr
+ float_constants = rffi.cast(lltype.Signed, addr)
+ float_constants = (float_constants + 15) & ~15 # align to 16 bytes
+ addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
+ qword_padding = '\x00\x00\x00\x00\x00\x00\x00\x00'
+ neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80'
+ abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
+ data = neg_const + qword_padding + abs_const + qword_padding
+ for i, char in enumerate(data):
+ addr[i] = char
+ self.float_const_neg_addr = float_constants
+ self.float_const_abs_addr = float_constants + 16
def _build_malloc_fixedsize_slowpath(self):
mc = self.mc2._mc
# ---------- first helper for the slow path of malloc ----------
self.malloc_fixedsize_slowpath1 = mc.tell()
if self.cpu.supports_floats: # save the XMM registers in
- for i in range(8): # the *caller* frame, from esp+8
- mc.MOVSD_sx(8+8*i, i)
+ for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
+ mc.MOVSD_sx((WORD*2)+8*i, i)
mc.SUB(edx, eax) # compute the size we want
- mc.MOV_sr(4, edx.value) # save it as the new argument
+ if WORD == 4:
+ mc.MOV_sr(WORD, edx.value) # save it as the new argument
+ elif WORD == 8:
+ # FIXME: We can't just clobber rdi like this, can we?
+ mc.MOV_rr(edi.value, edx.value)
+
addr = self.cpu.gc_ll_descr.get_malloc_fixedsize_slowpath_addr()
mc.JMP_l(addr) # tail call to the real malloc
# ---------- second helper for the slow path of malloc ----------
self.malloc_fixedsize_slowpath2 = mc.tell()
if self.cpu.supports_floats: # restore the XMM registers
- for i in range(8): # from where they were saved
- mc.MOVSD_xs(i, 8+8*i)
+ for i in range(self.cpu.NUM_REGS):# from where they were saved
+ mc.MOVSD_xs(i, (WORD*2)+8*i)
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
mc.RET()
@@ -337,7 +364,7 @@
# Compute the correct offset for the instruction LEA ESP, [EBP-4*words].
# Given that [EBP] is where we saved EBP, i.e. in the last word
# of our fixed frame, then the 'words' value is:
- words = (FRAME_FIXED_SIZE - 1) + reserved_depth
+ words = (self.cpu.FRAME_FIXED_SIZE - 1) + reserved_depth
# align, e.g. for Mac OS X
aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
mc.writeimm32(-WORD * aligned_words)
@@ -346,14 +373,25 @@
def _call_header(self):
self.mc.PUSH(ebp)
self.mc.MOV_rr(ebp.value, esp.value)
- self.mc.PUSH(ebx)
- self.mc.PUSH(esi)
- self.mc.PUSH(edi)
+ for regloc in self.cpu.CALLEE_SAVE_REGISTERS:
+ self.mc.PUSH(regloc)
+
# NB. the shape of the frame is hard-coded in get_basic_shape() too.
# Also, make sure this is consistent with FRAME_FIXED_SIZE.
return self._patchable_stackadjust()
+ def _call_footer(self, mc):
+ mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
+
+ for regloc in reversed(self.cpu.CALLEE_SAVE_REGISTERS):
+ mc.POP(regloc)
+
+ mc.POP(ebp)
+ mc.RET()
+
def _assemble_bootstrap_direct_call(self, arglocs, jmpadr, stackdepth):
+ if WORD == 8:
+ return self._assemble_bootstrap_direct_call_64(arglocs, jmpadr, stackdepth)
# XXX pushing ebx esi and edi is a bit pointless, since we store
# all regsiters anyway, for the case of guard_not_forced
# XXX this can be improved greatly. Right now it'll behave like
@@ -386,6 +424,34 @@
self.mc.JMP_l(jmpadr)
return adr_stackadjust
+ def _assemble_bootstrap_direct_call_64(self, arglocs, jmpadr, stackdepth):
+ # In reverse order for use with pop()
+ unused_gpr = [r9, r8, ecx, edx, esi, edi]
+ unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
+
+ nonfloatlocs, floatlocs = arglocs
+ adr_stackadjust = self._call_header()
+ self._patch_stackadjust(adr_stackadjust, stackdepth)
+
+ for i, loc in enumerate(nonfloatlocs):
+ if loc != None:
+ if len(unused_gpr) > 0:
+ self.mc.MOV(loc, unused_gpr.pop())
+ else:
+ self.mc.MOV_rb(X86_64_SCRATCH_REG.value, (2 + i) * WORD)
+ self.mc.MOV(loc, X86_64_SCRATCH_REG)
+
+ for i, loc in enumerate(floatlocs):
+ if loc != None:
+ if len(unused_xmm) > 0:
+ self.mc.MOVSD(loc, unused_xmm.pop())
+
+ self.mc.JMP_l(jmpadr)
+
+ return adr_stackadjust
+
+
+
def _assemble_bootstrap_code(self, inputargs, arglocs):
nonfloatlocs, floatlocs = arglocs
adr_stackadjust = self._call_header()
@@ -404,10 +470,10 @@
# reading them
self.mc.XOR(target, target)
adr = self.fail_boxes_ptr.get_addr_for_num(i)
- self.mc.XCHG_rj(target.value, adr)
+ self.mc.XCHG(target, heap(adr))
else:
adr = self.fail_boxes_int.get_addr_for_num(i)
- self.mc.MOV_rj(target.value, adr)
+ self.mc.MOV(target, heap(adr))
if target is not loc:
assert isinstance(loc, StackLoc)
self.mc.MOV_br(loc.value, target.value)
@@ -417,9 +483,9 @@
continue
adr = self.fail_boxes_float.get_addr_for_num(i)
if isinstance(loc, RegLoc):
- self.mc.MOVSD_xj(loc.value, adr)
+ self.mc.MOVSD(loc, heap(adr))
else:
- self.mc.MOVSD_xj(xmmtmp.value, adr)
+ self.mc.MOVSD(xmmtmp, heap(adr))
assert isinstance(loc, StackLoc)
self.mc.MOVSD_bx(loc.value, xmmtmp.value)
return adr_stackadjust
@@ -540,16 +606,20 @@
def _cmpop_float(cond, is_ne=False):
def genop_cmp(self, op, arglocs, result_loc):
self.mc.UCOMISD(arglocs[0], arglocs[1])
- rl = result_loc.lowest8bits()
- rh = result_loc.higher8bits()
- self.mc.SET_ir(rx86.Conditions[cond], rl.value)
+ tmp1 = result_loc.lowest8bits()
+ if WORD == 4:
+ tmp2 = result_loc.higher8bits()
+ elif WORD == 8:
+ tmp2 = X86_64_SCRATCH_REG.lowest8bits()
+
+ self.mc.SET_ir(rx86.Conditions[cond], tmp1.value)
if is_ne:
- self.mc.SET_ir(rx86.Conditions['P'], rh.value)
- self.mc.OR8_rr(rl.value, rh.value)
+ self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
+ self.mc.OR8_rr(tmp1.value, tmp2.value)
else:
- self.mc.SET_ir(rx86.Conditions['NP'], rh.value)
- self.mc.AND8_rr(rl.value, rh.value)
- self.mc.MOVZX8_rr(result_loc.value, rl.value)
+ self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
+ self.mc.AND8_rr(tmp1.value, tmp2.value)
+ self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
return genop_cmp
def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
@@ -592,6 +662,9 @@
@specialize.arg(5)
def _emit_call(self, x, arglocs, start=0, tmp=eax, force_mc=False,
mc=None):
+ if WORD == 8:
+ return self._emit_call_64(x, arglocs, start, tmp, force_mc, mc)
+
if not force_mc:
mc = self.mc
p = 0
@@ -619,7 +692,88 @@
# x is a location
mc.CALL(x)
self.mark_gc_roots()
+
+ def _emit_call_64(self, x, arglocs, start=0, tmp=eax, force_mc=False, mc=None):
+ if not force_mc:
+ mc = self.mc
+
+ # In reverse order for use with pop()
+ unused_gpr = [r9, r8, ecx, edx, esi, edi]
+ unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
+ pass_in_reg = {}
+ pass_on_stack = []
+
+ for i in range(start, len(arglocs)):
+ loc = arglocs[i]
+ assert isinstance(loc, RegLoc) or isinstance(loc, ImmedLoc) or isinstance(loc, StackLoc)
+
+ # XXX: Should be much simplier to tell whether a location is a float!
+ if (isinstance(loc, RegLoc) and loc.is_xmm) or (isinstance(loc, StackLoc) and loc.type == FLOAT):
+ if len(unused_xmm) > 0:
+ pass_in_reg[unused_xmm.pop()] = loc
+ else:
+ pass_on_stack.append(loc)
+ else:
+ if len(unused_gpr) > 0:
+ pass_in_reg[unused_gpr.pop()] = loc
+ else:
+ pass_on_stack.append(loc)
+ # Emit instructions to pass the stack arguments
+ for i, loc in enumerate(pass_on_stack):
+ if isinstance(loc, StackLoc):
+ if loc.type == FLOAT:
+ mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
+ mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
+ else:
+ mc.MOV(X86_64_SCRATCH_REG, loc)
+ mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
+ else:
+ assert isinstance(loc, RegLoc)
+ if loc.is_xmm:
+ mc.MOVSD_sx(i*WORD, loc.value)
+ else:
+ mc.MOV_sr(i*WORD, loc.value)
+
+ previous_len = sys.maxint # dummy value to start loop
+ while 0 < len(pass_in_reg) < previous_len:
+ previous_len = len(pass_in_reg)
+ # XXX: Maybe a little slow to call items() and values() so much?
+ for reg, loc in pass_in_reg.items():
+ if reg == loc:
+ # Happy coincidence, do nothing
+ del pass_in_reg[reg]
+ elif reg not in pass_in_reg.values():
+ if reg.is_xmm:
+ mc.MOVSD(reg, loc)
+ else:
+ mc.MOV(reg, loc)
+
+ del pass_in_reg[reg]
+
+ if len(pass_in_reg) > 0:
+ # Rats. There is a "cycle". eg, r9 contains the value that we want
+ # to pass in r8, but r8 contains the value we want to pass in r9,
+ # so we can't just clobber r8.
+ #
+ # There's probably some clever way we could handle this, but we
+ # just temporarily dump the registers to the stack and then copy
+ # back to the correct registers. Hopefully this doesn't happen too
+ # often anyway.
+ #
+ # FIXME: Use of PUSH/POP will fail with XMM registers
+ items = pass_in_reg.items()
+ for reg, loc in items:
+ mc.PUSH(loc)
+
+ for reg, loc in reversed(items):
+ mc.POP(reg)
+
+ self._regalloc.reserve_param(len(pass_on_stack))
+ mc.CALL(x)
+
+ self.mark_gc_roots()
+
def call(self, addr, args, res):
self._emit_call(rel32(addr), args)
assert res is eax
@@ -696,11 +850,11 @@
def genop_float_neg(self, op, arglocs, resloc):
# Following what gcc does: res = x ^ 0x8000000000000000
- self.mc.XORPD_xj(arglocs[0].value, self.float_const_neg_addr)
+ self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
def genop_float_abs(self, op, arglocs, resloc):
# Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
- self.mc.ANDPD_xj(arglocs[0].value, self.float_const_abs_addr)
+ self.mc.ANDPD(arglocs[0], heap(self.float_const_abs_addr))
def genop_guard_float_is_true(self, op, guard_op, addr, arglocs, resloc):
guard_opnum = guard_op.opnum
@@ -758,7 +912,13 @@
genop_virtual_ref = genop_same_as
def genop_int_mod(self, op, arglocs, resloc):
- self.mc.CDQ()
+ if WORD == 4:
+ self.mc.CDQ()
+ elif WORD == 8:
+ self.mc.CQO()
+ else:
+ raise AssertionError("Can't happen")
+
self.mc.IDIV_r(ecx.value)
genop_int_floordiv = genop_int_mod
@@ -807,14 +967,14 @@
size = size_loc.value
source_addr = AddressLoc(base_loc, ofs_loc)
- if size == 1:
+ if resloc.is_xmm:
+ self.mc.MOVSD(resloc, source_addr)
+ elif size == 1:
self.mc.MOVZX8(resloc, source_addr)
elif size == 2:
self.mc.MOVZX16(resloc, source_addr)
elif size == WORD:
self.mc.MOV(resloc, source_addr)
- elif size == 8:
- self.mc.MOVSD(resloc, source_addr)
else:
raise NotImplementedError("getfield size = %d" % size)
@@ -833,7 +993,7 @@
if scale.value == 0:
self.mc.MOVZX8(resloc, addr8_add(base_loc, ofs_loc, ofs.value,
scale.value))
- elif scale.value == 2:
+ elif (1 << scale.value) == WORD:
self.mc.MOV(resloc, addr_add(base_loc, ofs_loc, ofs.value,
scale.value))
else:
@@ -847,7 +1007,7 @@
assert isinstance(size_loc, ImmedLoc)
size = size_loc.value
dest_addr = AddressLoc(base_loc, ofs_loc)
- if size == WORD * 2:
+ if isinstance(value_loc, RegLoc) and value_loc.is_xmm:
self.mc.MOVSD(dest_addr, value_loc)
elif size == WORD:
self.mc.MOV(dest_addr, value_loc)
@@ -867,7 +1027,7 @@
if op.args[2].type == FLOAT:
self.mc.MOVSD(dest_addr, value_loc)
else:
- if scale_loc.value == 2:
+ if (1 << scale_loc.value) == WORD:
self.mc.MOV(dest_addr, value_loc)
elif scale_loc.value == 0:
self.mc.MOV8(dest_addr, value_loc.lowest8bits())
@@ -887,7 +1047,7 @@
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
self.cpu.translate_support_code)
if itemsize == 4:
- self.mc.MOV(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
+ self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
elif itemsize == 2:
self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
else:
@@ -925,7 +1085,7 @@
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
self.cpu.translate_support_code)
if itemsize == 4:
- self.mc.MOV(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
+ self.mc.MOV32(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
elif itemsize == 2:
self.mc.MOVZX16(resloc, AddressLoc(base_loc, ofs_loc, 1, basesize))
else:
@@ -939,7 +1099,7 @@
def genop_guard_guard_no_exception(self, ign_1, guard_op, addr,
locs, ign_2):
- self.mc.CMP_ji(self.cpu.pos_exception(), 0)
+ self.mc.CMP(heap(self.cpu.pos_exception()), imm(0))
return self.implement_guard(addr, 'NZ')
def genop_guard_guard_exception(self, ign_1, guard_op, addr,
@@ -1078,7 +1238,8 @@
DESCR_INT = 0x01
DESCR_FLOAT = 0x02
DESCR_SPECIAL = 0x03
- CODE_FROMSTACK = 4*8
+ # XXX: 4*8 works on i386, should we optimize for that case?
+ CODE_FROMSTACK = 4*16
CODE_STOP = 0 | DESCR_SPECIAL
CODE_HOLE = 4 | DESCR_SPECIAL
@@ -1218,7 +1379,7 @@
code = (code - self.CODE_FROMSTACK) >> 2
stackloc = frame_addr + get_ebp_ofs(code)
value = rffi.cast(rffi.LONGP, stackloc)[0]
- if kind == self.DESCR_FLOAT:
+ if kind == self.DESCR_FLOAT and WORD == 4:
value_hi = value
value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
else:
@@ -1232,8 +1393,11 @@
break
code >>= 2
if kind == self.DESCR_FLOAT:
- value = allregisters[2*code]
- value_hi = allregisters[2*code + 1]
+ if WORD == 4:
+ value = allregisters[2*code]
+ value_hi = allregisters[2*code + 1]
+ else:
+ value = allregisters[code]
else:
value = allregisters[16 + code]
@@ -1244,7 +1408,8 @@
tgt = self.fail_boxes_ptr.get_addr_for_num(num)
elif kind == self.DESCR_FLOAT:
tgt = self.fail_boxes_float.get_addr_for_num(num)
- rffi.cast(rffi.LONGP, tgt)[1] = value_hi
+ if WORD == 4:
+ rffi.cast(rffi.LONGP, tgt)[1] = value_hi
else:
assert 0, "bogus kind"
rffi.cast(rffi.LONGP, tgt)[0] = value
@@ -1252,7 +1417,9 @@
#
if not we_are_translated():
assert bytecode[4] == 0xCC
- fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
+ # fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
+ fail_index = rffi.cast(rffi.INTP, bytecode)[0]
+ fail_index = rffi.cast(lltype.Signed, fail_index)
return fail_index
def setup_failure_recovery(self):
@@ -1264,7 +1431,7 @@
# value of XMM registers, and finally a reference to the
# recovery bytecode. See _build_failure_recovery() for details.
stack_at_ebp = registers[ebp.value]
- bytecode = rffi.cast(rffi.UCHARP, registers[8])
+ bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
allregisters = rffi.ptradd(registers, -16)
return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
@@ -1283,18 +1450,18 @@
# Assume that we are called at the beginning, when there is no risk
# that 'mc' runs out of space. Checked by asserts in mc.write().
recovery_addr = mc.tell()
- mc.PUSH(edi)
- mc.PUSH(esi)
- mc.PUSH(ebp)
- mc.PUSH(esp) # <-- not really used, but needed to take up the space
- mc.PUSH(ebx)
- mc.PUSH(edx)
- mc.PUSH(ecx)
- mc.PUSH(eax)
- mc.MOV_rr(esi.value, esp.value)
+
+ # Push all general purpose registers
+ for gpr in reversed(range(self.cpu.NUM_REGS)):
+ mc.PUSH_r(gpr)
+
+ # ebx/rbx is callee-save in both i386 and x86-64
+ mc.MOV_rr(ebx.value, esp.value)
+
if withfloats:
- mc.SUB_ri(esp.value, 8*8)
- for i in range(8):
+ # Push all float registers
+ mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
+ for i in range(self.cpu.NUM_REGS):
mc.MOVSD_sx(8*i, i)
# we call a provided function that will
@@ -1313,19 +1480,24 @@
# bytecode, pushed just before by the CALL instruction written by
# generate_quick_failure(). XXX misaligned stack in the call, but
# it's ok because failure_recovery_func is not calling anything more
- mc.PUSH(esi)
+
+ # XXX
+ if mc.WORD == 4:
+ mc.PUSH(ebx)
+ elif mc.WORD == 8:
+ mc.MOV(edi, ebx)
+ # XXX: Correct to only align the stack on 64-bit?
+ mc.AND_ri(esp.value, -16)
+ else:
+ raise AssertionError("Shouldn't happen")
+
mc.CALL_l(failure_recovery_func)
# returns in eax the fail_index
# now we return from the complete frame, which starts from
# _assemble_bootstrap_code(). The LEA below throws away most
# of the frame, including all the PUSHes that we did just above.
- mc.LEA_rb(esp.value, -3 * WORD)
- mc.POP(edi) # [ebp-12]
- mc.POP(esi) # [ebp-8]
- mc.POP(ebx) # [ebp-4]
- mc.POP(ebp) # [ebp]
- mc.RET()
+ self._call_footer(mc)
self.mc2.done()
self.failure_recovery_code[exc + 2 * withfloats] = recovery_addr
@@ -1334,15 +1506,15 @@
for i in range(len(locs)):
loc = locs[i]
if isinstance(loc, RegLoc):
- if loc.width == 8:
+ if loc.is_xmm:
adr = self.fail_boxes_float.get_addr_for_num(i)
- mc.MOVSD_jx(adr, loc.value)
+ mc.MOVSD(heap(adr), loc)
else:
if locs_are_ref[i]:
adr = self.fail_boxes_ptr.get_addr_for_num(i)
else:
adr = self.fail_boxes_int.get_addr_for_num(i)
- mc.MOV_jr(adr, loc.value)
+ mc.MOV(heap(adr), loc)
for i in range(len(locs)):
loc = locs[i]
if not isinstance(loc, RegLoc):
@@ -1350,14 +1522,14 @@
assert isinstance(loc, StackLoc)
mc.MOVSD_xb(xmm0.value, loc.value)
adr = self.fail_boxes_float.get_addr_for_num(i)
- mc.MOVSD_jx(adr, xmm0.value)
+ mc.MOVSD(heap(adr), xmm0)
else:
if locs_are_ref[i]:
adr = self.fail_boxes_ptr.get_addr_for_num(i)
else:
adr = self.fail_boxes_int.get_addr_for_num(i)
mc.MOV(eax, loc)
- mc.MOV_jr(adr, eax.value)
+ mc.MOV(heap(adr), eax)
# we call a provided function that will
# - call our on_leave_jitted_hook which will mark
@@ -1367,13 +1539,10 @@
addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
mc.CALL_l(addr)
- mc.LEA_rb(esp.value, -3 * WORD)
mc.MOV(eax, imm(fail_index))
- mc.POP(edi) # [ebp-12]
- mc.POP(esi) # [ebp-8]
- mc.POP(ebx) # [ebp-4]
- mc.POP(ebp) # [ebp]
- mc.RET()
+
+ # exit function
+ self._call_footer(mc)
# FIXME: I changed the third argument to this method, but I don't know
# what to do with @specialize
@@ -1401,7 +1570,7 @@
self._emit_call(x, arglocs, 2, tmp=tmp)
- if isinstance(resloc, StackLoc) and resloc.width == 8:
+ if isinstance(resloc, StackLoc) and resloc.width == 8 and WORD == 4:
self.mc.FSTP_b(resloc.value)
elif size == 1:
self.mc.AND(eax, imm(0xff))
@@ -1445,10 +1614,10 @@
assert 0 < offset <= 127
mc.overwrite(jmp_location - 1, [chr(offset)])
self._stop_block()
- if isinstance(result_loc, StackLoc) and result_loc.width == 8:
+ if WORD == 4 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
self.mc.FSTP_b(result_loc.value)
else:
- assert result_loc is eax or result_loc is None
+ assert result_loc is eax or result_loc is xmm0 or result_loc is None
self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
return self.implement_guard(addr, 'L')
@@ -1469,6 +1638,11 @@
# we choose the most compact encoding over the most efficient one.
for i in range(len(arglocs)-1, -1, -1):
mc.PUSH(arglocs[i])
+
+ if WORD == 8:
+ mc.MOV_rs(edi.value, 0)
+ mc.MOV_rs(esi.value, 8)
+
# misaligned stack in the call, but it's ok because the write barrier
# is not going to call anything more. Also, this assumes that the
# write barrier does not touch the xmm registers.
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/codebuf.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/codebuf.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/codebuf.py Sat Jun 12 00:19:21 2010
@@ -2,13 +2,19 @@
import os, sys
from pypy.rpython.lltypesystem import lltype, rffi
from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.jit.backend.x86.rx86 import X86_32_CodeBuilder
+from pypy.jit.backend.x86.rx86 import X86_32_CodeBuilder, X86_64_CodeBuilder
from pypy.jit.backend.x86.regloc import LocationCodeBuilder
from pypy.rlib.rmmap import PTR, alloc, free
from pypy.rlib.debug import make_sure_not_resized
+from pypy.jit.backend.x86.regalloc import WORD
+# XXX: Seems nasty to change the superclass of InMemoryCodeBuilder like this
+if WORD == 4:
+ codebuilder_cls = X86_32_CodeBuilder
+elif WORD == 8:
+ codebuilder_cls = X86_64_CodeBuilder
-class InMemoryCodeBuilder(X86_32_CodeBuilder, LocationCodeBuilder):
+class InMemoryCodeBuilder(codebuilder_cls, LocationCodeBuilder):
_last_dump_start = 0
def __init__(self, start, end):
@@ -53,11 +59,6 @@
self._pos = pos
self._last_dump_start = pos
- def execute(self, arg1, arg2):
- # XXX old testing stuff
- fnptr = rffi.cast(lltype.Ptr(BINARYFN), self._data)
- return fnptr(arg1, arg2)
-
def done(self):
# normally, no special action is needed here
if machine_code_dumper.enabled:
@@ -80,9 +81,6 @@
valgrind.discard_translations(self._data, self._size)
-BINARYFN = lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed)
-
-
class MachineCodeDumper:
enabled = True
log_fd = -1
@@ -110,7 +108,10 @@
return False
# log the executable name
from pypy.jit.backend.hlinfo import highleveljitinfo
- os.write(self.log_fd, 'BACKEND i386\n')
+ if WORD == 4:
+ os.write(self.log_fd, 'BACKEND x86\n')
+ elif WORD == 8:
+ os.write(self.log_fd, 'BACKEND x86_64\n')
if highleveljitinfo.sys_executable:
os.write(self.log_fd, 'SYS_EXECUTABLE %s\n' % (
highleveljitinfo.sys_executable,))
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regalloc.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regalloc.py Sat Jun 12 00:19:21 2010
@@ -17,15 +17,27 @@
from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
TempBox
-WORD = 4
-FRAME_FIXED_SIZE = 5 # ebp + ebx + esi + edi + force_index = 5 words
-FORCE_INDEX_OFS = -4*WORD
-
-width_of_type = {
- INT : 1,
- REF : 1,
- FLOAT : 2,
+# XXX
+import sys
+if sys.maxint == (2**31 - 1):
+ WORD = 4
+ FRAME_FIXED_SIZE = 5 # ebp + ebx + esi + edi + force_index = 5 words
+ FORCE_INDEX_OFS = -4*WORD
+ width_of_type = {
+ INT : 1,
+ REF : 1,
+ FLOAT : 2,
}
+else:
+ WORD = 8
+ FRAME_FIXED_SIZE = 7
+ FORCE_INDEX_OFS = -6*WORD
+ width_of_type = {
+ INT : 1,
+ REF : 1,
+ FLOAT : 1,
+ }
+
class X86RegisterManager(RegisterManager):
@@ -51,6 +63,12 @@
print "convert_to_imm: got a %s" % c
raise AssertionError
+class X86_64_RegisterManager(X86RegisterManager):
+ # r11 omitted because it's used as scratch
+ all_regs = [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
+ no_lower_byte_regs = []
+ save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
+
class FloatConstants(object):
BASE_CONSTANT_SIZE = 1000
@@ -102,6 +120,20 @@
# after the call
return self.frame_manager.loc(v)
+class X86_64_XMMRegisterManager(X86XMMRegisterManager):
+ # xmm15 reserved for scratch use
+ all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14]
+ save_around_call_regs = all_regs
+
+ def call_result_location(self, v):
+ return xmm0
+
+ def after_call(self, v):
+ # We use RegisterManager's implementation, since X86XMMRegisterManager
+ # places the result on the stack, which we don't need to do when the
+ # calling convention places the result in xmm0
+ return RegisterManager.after_call(self, v)
+
class X86FrameManager(FrameManager):
@staticmethod
@@ -134,11 +166,21 @@
# compute longevity of variables
longevity = self._compute_vars_longevity(inputargs, operations)
self.longevity = longevity
- self.rm = X86RegisterManager(longevity,
- frame_manager = self.fm,
- assembler = self.assembler)
- self.xrm = X86XMMRegisterManager(longevity, frame_manager = self.fm,
- assembler = self.assembler)
+ # XXX
+ if cpu.WORD == 4:
+ gpr_reg_mgr_cls = X86RegisterManager
+ xmm_reg_mgr_cls = X86XMMRegisterManager
+ elif cpu.WORD == 8:
+ gpr_reg_mgr_cls = X86_64_RegisterManager
+ xmm_reg_mgr_cls = X86_64_XMMRegisterManager
+ else:
+ raise AssertionError("Word size should be 4 or 8")
+
+ self.rm = gpr_reg_mgr_cls(longevity,
+ frame_manager = self.fm,
+ assembler = self.assembler)
+ self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
+ assembler = self.assembler)
def prepare_loop(self, inputargs, operations, looptoken):
self._prepare(inputargs, operations)
@@ -263,11 +305,11 @@
else:
self.fm.frame_bindings[arg] = loc
self.rm.free_regs = []
- for reg in X86RegisterManager.all_regs:
+ for reg in self.rm.all_regs:
if reg not in used:
self.rm.free_regs.append(reg)
self.xrm.free_regs = []
- for reg in X86XMMRegisterManager.all_regs:
+ for reg in self.xrm.all_regs:
if reg not in used:
self.xrm.free_regs.append(reg)
# note: we need to make a copy of inputargs because possibly_free_vars
@@ -680,7 +722,7 @@
# function, a GC write barrier, is known not to touch them.
# See remember_young_pointer() in rpython/memory/gc/generation.py.
for v, reg in self.rm.reg_bindings.items():
- if ((reg is eax or reg is ecx or reg is edx)
+ if (reg in self.rm.save_around_call_regs
and self.rm.stays_alive(v)):
arglocs.append(reg)
self.PerformDiscard(op, arglocs)
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/regloc.py Sat Jun 12 00:19:21 2010
@@ -141,10 +141,15 @@
def value_m(self):
return self.loc_m
-REGLOCS = [RegLoc(i, is_xmm=False) for i in range(8)]
-XMMREGLOCS = [RegLoc(i, is_xmm=True) for i in range(8)]
-eax, ecx, edx, ebx, esp, ebp, esi, edi = REGLOCS
-xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 = XMMREGLOCS
+REGLOCS = [RegLoc(i, is_xmm=False) for i in range(16)]
+XMMREGLOCS = [RegLoc(i, is_xmm=True) for i in range(16)]
+eax, ecx, edx, ebx, esp, ebp, esi, edi, r8, r9, r10, r11, r12, r13, r14, r15 = REGLOCS
+xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 = XMMREGLOCS
+
+X86_64_SCRATCH_REG = r11
+# XXX: a GPR scratch register is definitely needed, but we could probably do
+# without an xmm scratch reg.
+X86_64_XMM_SCRATCH_REG = xmm15
unrolling_location_codes = unrolling_iterable(list("rbsmajix"))
@@ -159,14 +164,23 @@
if code1 == possible_code1:
for possible_code2 in unrolling_location_codes:
if code2 == possible_code2:
- methname = name + "_" + possible_code1 + possible_code2
- if hasattr(rx86.AbstractX86CodeBuilder, methname):
- val1 = getattr(loc1, "value_" + possible_code1)()
- val2 = getattr(loc2, "value_" + possible_code2)()
- getattr(self, methname)(val1, val2)
- return
+ # FIXME: Not RPython anymore!
+ # Fake out certain operations for x86_64
+ val1 = getattr(loc1, "value_" + possible_code1)()
+ val2 = getattr(loc2, "value_" + possible_code2)()
+ # XXX: Could use RIP+disp32 in some cases
+ if self.WORD == 8 and possible_code1 == 'j':
+ self.MOV_ri(X86_64_SCRATCH_REG.value, val1)
+ getattr(self, name + "_" + "m" + possible_code2)((X86_64_SCRATCH_REG.value, 0), val2)
+ elif self.WORD == 8 and possible_code2 == 'j':
+ self.MOV_ri(X86_64_SCRATCH_REG.value, val2)
+ getattr(self, name + "_" + possible_code1 + "m")(val1, (X86_64_SCRATCH_REG.value, 0))
+ elif self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits(val2) and name != 'MOV':
+ self.MOV_ri(X86_64_SCRATCH_REG.value, val2)
+ getattr(self, name + "_" + possible_code1 + "r")(val1, X86_64_SCRATCH_REG.value)
else:
- raise AssertionError("Instruction not defined: " + methname)
+ methname = name + "_" + possible_code1 + possible_code2
+ getattr(self, methname)(val1, val2)
return INSN
@@ -176,7 +190,8 @@
for possible_code in unrolling_location_codes:
if code == possible_code:
methname = name + "_" + possible_code
- if hasattr(rx86.AbstractX86CodeBuilder, methname):
+ # if hasattr(rx86.AbstractX86CodeBuilder, methname):
+ if hasattr(self, methname):
val = getattr(loc, "value_" + possible_code)()
getattr(self, methname)(val)
return
@@ -217,9 +232,11 @@
MOV16 = _16_bit_binaryop('MOV')
MOVZX8 = _binaryop('MOVZX8')
MOVZX16 = _binaryop('MOVZX16')
+ MOV32 = _binaryop('MOV32')
+ XCHG = _binaryop('XCHG')
- PUSH = _unaryop("PUSH")
- POP = _unaryop("POP")
+ PUSH = _unaryop('PUSH')
+ POP = _unaryop('POP')
LEA = _binaryop('LEA')
@@ -232,6 +249,9 @@
CVTSI2SD = _binaryop('CVTSI2SD')
CVTTSD2SI = _binaryop('CVTTSD2SI')
+ ANDPD = _binaryop('ANDPD')
+ XORPD = _binaryop('XORPD')
+
CALL = _unaryop('CALL')
def imm(x):
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/runner.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/runner.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/runner.py Sat Jun 12 00:19:21 2010
@@ -6,6 +6,8 @@
from pypy.jit.backend.x86.regalloc import FORCE_INDEX_OFS
from pypy.jit.backend.x86.profagent import ProfileAgent
from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
+from pypy.jit.backend.x86 import regloc
+import sys
class AbstractX86CPU(AbstractLLCPU):
debug = True
@@ -130,13 +132,27 @@
return faildescr
class CPU386(AbstractX86CPU):
- pass
+ WORD = 4
+ NUM_REGS = 8
+ CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.esi, regloc.edi]
+ FRAME_FIXED_SIZE = len(CALLEE_SAVE_REGISTERS) + 2
+
+ def __init__(self, *args, **kwargs):
+ assert sys.maxint == (2**31 - 1)
+ super(CPU386, self).__init__(*args, **kwargs)
class CPU386_NO_SSE2(CPU386):
supports_floats = False
class CPU_X86_64(AbstractX86CPU):
- pass
+ WORD = 8
+ NUM_REGS = 16
+ CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14, regloc.r15]
+ FRAME_FIXED_SIZE = len(CALLEE_SAVE_REGISTERS) + 2
+
+ def __init__(self, *args, **kwargs):
+ assert sys.maxint == (2**63 - 1)
+ super(CPU_X86_64, self).__init__(*args, **kwargs)
CPU = CPU386
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/rx86.py Sat Jun 12 00:19:21 2010
@@ -32,10 +32,13 @@
xmmnames = ['xmm%d' % i for i in range(16)]
def low_byte(reg):
- assert 0 <= reg < 4
+ # XXX: On 32-bit, this only works for 0 <= reg < 4
+ # Maybe we should check this?
return reg | BYTE_REG_FLAG
def high_byte(reg):
+ # This probably shouldn't be called in 64-bit mode, since to use the
+ # high-byte registers you have to make sure that there is no REX-prefix
assert 0 <= reg < 4
return (reg + 4) | BYTE_REG_FLAG
@@ -83,13 +86,18 @@
return encode_register, argnum, factor, rex_register
@specialize.arg(2)
+def rex_byte_register(mc, reg, factor):
+ assert reg & BYTE_REG_FLAG
+ return rex_register(mc, reg & ~BYTE_REG_FLAG, factor)
+
+ at specialize.arg(2)
def encode_byte_register(mc, reg, factor, orbyte):
assert reg & BYTE_REG_FLAG
return encode_register(mc, reg & ~BYTE_REG_FLAG, factor, orbyte)
def byte_register(argnum, factor=1):
assert factor in (1, 8)
- return encode_byte_register, argnum, factor, rex_register
+ return encode_byte_register, argnum, factor, rex_byte_register
# ____________________________________________________________
@@ -310,6 +318,7 @@
arg = args[arg]
orbyte = encode_step(mc, arg, extra, orbyte)
assert orbyte == 0
+
#
encoding_steps = []
for step in encoding:
@@ -431,26 +440,29 @@
mem_reg_plus_scaled_reg_plus_const(1))
MOV_ai = insn(rex_w, '\xC7', orbyte(0<<3), mem_reg_plus_scaled_reg_plus_const(1), immediate(2))
- # "MOV reg1, [immediate2]" and the opposite direction
- MOV_rj = insn(rex_w, '\x8B', register(1,8), '\x05', immediate(2))
- MOV_jr = insn(rex_w, '\x89', register(2,8), '\x05', immediate(1))
- MOV_ji = insn(rex_w, '\xC7', '\x05', immediate(1), immediate(2))
MOV8_mr = insn(rex_w, '\x88', byte_register(2, 8), mem_reg_plus_const(1))
MOV8_ar = insn(rex_w, '\x88', byte_register(2, 8), mem_reg_plus_scaled_reg_plus_const(1))
MOV8_mi = insn(rex_w, '\xC6', orbyte(0<<3), mem_reg_plus_const(1), immediate(2, 'b'))
MOV8_ai = insn(rex_w, '\xC6', orbyte(0<<3), mem_reg_plus_scaled_reg_plus_const(1), immediate(2, 'b'))
- MOV8_ji = insn(rex_w, '\xC6', orbyte(0<<3), '\x05', immediate(1), immediate(2, 'b'))
- MOV8_jr = insn(rex_w, '\x88', byte_register(2, 8), '\x05', immediate(1))
MOVZX8_rr = insn(rex_w, '\x0F\xB6', register(1,8), byte_register(2), '\xC0')
MOVZX8_rm = insn(rex_w, '\x0F\xB6', register(1,8), mem_reg_plus_const(2))
MOVZX8_ra = insn(rex_w, '\x0F\xB6', register(1,8), mem_reg_plus_scaled_reg_plus_const(2))
- MOVZX8_rj = insn(rex_w, '\x0F\xB6', register(1,8), '\x05', immediate(2))
MOVZX16_rm = insn(rex_w, '\x0F\xB7', register(1,8), mem_reg_plus_const(2))
MOVZX16_ra = insn(rex_w, '\x0F\xB7', register(1,8), mem_reg_plus_scaled_reg_plus_const(2))
+ # FIXME: Only difference between MOV32 and MOV instructions is rex_nw instead of rex_w
+ MOV32_ra = insn(rex_nw, '\x8B', register(1,8),
+ mem_reg_plus_scaled_reg_plus_const(2))
+ MOV32_ar = insn(rex_nw, '\x89', register(2,8),
+ mem_reg_plus_scaled_reg_plus_const(1))
+ MOV32_rm = insn(rex_nw, '\x8B', register(1,8), mem_reg_plus_const(2))
+ MOV32_mr = insn(rex_nw, '\x89', register(2,8), mem_reg_plus_const(1))
+ MOV32_mi = insn(rex_nw, '\xC7', orbyte(0<<3), mem_reg_plus_const(1),
+ immediate(2, 'i'))
+
# ------------------------------ Arithmetic ------------------------------
ADD_ri, ADD_rr, ADD_rb, _, _ = common_modes(0)
@@ -464,11 +476,7 @@
CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
CMP_mi = select_8_or_32_bit_immed(CMP_mi8, CMP_mi32)
- CMP_ji8 = insn(rex_w, '\x83', '\x3D', immediate(1), immediate(2, 'b'))
- CMP_ji32 = insn(rex_w, '\x81', '\x3D', immediate(1), immediate(2))
- CMP_ji = select_8_or_32_bit_immed(CMP_ji8, CMP_ji32)
-
- CMP_rj = insn(rex_w, '\x3B', register(1, 8), '\x05', immediate(2))
+ CMP_rm = insn(rex_w, '\x3B', register(1, 8), mem_reg_plus_const(2))
AND8_rr = insn(rex_w, '\x20', byte_register(1), byte_register(2,8), '\xC0')
@@ -515,12 +523,9 @@
CALL_l = insn('\xE8', relative(1))
CALL_r = insn(rex_nw, '\xFF', register(1), chr(0xC0 | (2<<3)))
CALL_b = insn('\xFF', orbyte(2<<3), stack_bp(1))
- # XXX: Bit of kludge, but works in 32-bit because the relative 32-bit
- # displacement is always enough to encode any address
- CALL_j = CALL_l
XCHG_rm = insn(rex_w, '\x87', register(1,8), mem_reg_plus_const(2))
- XCHG_rj = insn(rex_w, '\x87', register(1,8), '\x05', immediate(2))
+ XCHG_rr = insn(rex_w, '\x87', register(1), register(2,8), '\xC0')
JMP_l = insn('\xE9', relative(1))
# FIXME: J_il8 and JMP_l8 assume the caller will do the appropriate
@@ -530,7 +535,7 @@
J_il8 = insn(immediate(1, 'o'), '\x70', immediate(2, 'b'))
J_il = insn('\x0F', immediate(1,'o'), '\x80', relative(2))
- SET_ir = insn('\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
+ SET_ir = insn(rex_w, '\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
# The 64-bit version of this, CQO, is defined in X86_64_CodeBuilder
CDQ = insn(rex_nw, '\x99')
@@ -556,30 +561,28 @@
mem_reg_plus_const(1))
MOVSD_ax = xmminsn('\xF2', rex_nw, '\x0F\x11', register(2,8), mem_reg_plus_scaled_reg_plus_const(1))
- MOVSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x10', register(1, 8), '\x05', immediate(2))
- MOVSD_jx = xmminsn('\xF2', rex_nw, '\x0F\x11', register(2, 8), '\x05', immediate(1))
# Arithmetic
ADDSD_xx = xmminsn('\xF2', rex_nw, '\x0F\x58', register(1, 8), register(2), '\xC0')
ADDSD_xb = xmminsn('\xF2', rex_nw, '\x0F\x58', register(1, 8), stack_bp(2))
- ADDSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x58', register(1, 8), '\x05', immediate(2))
+ ADDSD_xm = xmminsn('\xF2', rex_nw, '\x0F\x58', register(1, 8), mem_reg_plus_const(2))
SUBSD_xx = xmminsn('\xF2', rex_nw, '\x0F\x5C', register(1, 8), register(2), '\xC0')
SUBSD_xb = xmminsn('\xF2', rex_nw, '\x0F\x5C', register(1, 8), stack_bp(2))
- SUBSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x5C', register(1, 8), '\x05', immediate(2))
+ SUBSD_xm = xmminsn('\xF2', rex_nw, '\x0F\x5C', register(1, 8), mem_reg_plus_const(2))
MULSD_xx = xmminsn('\xF2', rex_nw, '\x0F\x59', register(1, 8), register(2), '\xC0')
MULSD_xb = xmminsn('\xF2', rex_nw, '\x0F\x59', register(1, 8), stack_bp(2))
- MULSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x59', register(1, 8), '\x05', immediate(2))
+ MULSD_xm = xmminsn('\xF2', rex_nw, '\x0F\x59', register(1, 8), mem_reg_plus_const(2))
DIVSD_xx = xmminsn('\xF2', rex_nw, '\x0F\x5E', register(1, 8), register(2), '\xC0')
DIVSD_xb = xmminsn('\xF2', rex_nw, '\x0F\x5E', register(1, 8), stack_bp(2))
- DIVSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x5E', register(1, 8), '\x05', immediate(2))
+ DIVSD_xm = xmminsn('\xF2', rex_nw, '\x0F\x5E', register(1, 8), mem_reg_plus_const(2))
# Comparision
UCOMISD_xx = xmminsn('\x66', rex_nw, '\x0F\x2E', register(1, 8), register(2), '\xC0')
UCOMISD_xb = xmminsn('\x66', rex_nw, '\x0F\x2E', register(1, 8), stack_bp(2))
- UCOMISD_xj = xmminsn('\x66', rex_nw, '\x0F\x2E', register(1, 8), '\x05', immediate(2))
+ UCOMISD_xm = xmminsn('\x66', rex_nw, '\x0F\x2E', register(1, 8), mem_reg_plus_const(2))
# Conversion
CVTSI2SD_xr = xmminsn('\xF2', rex_w, '\x0F\x2A', register(1, 8), register(2), '\xC0')
@@ -589,10 +592,11 @@
CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
# Bitwise
- ANDPD_xj = xmminsn('\x66', rex_nw, '\x0F\x54', register(1, 8), '\x05', immediate(2))
XORPD_xx = xmminsn('\x66', rex_nw, '\x0F\x57', register(1, 8), register(2), '\xC0')
- XORPD_xj = xmminsn('\x66', rex_nw, '\x0F\x57', register(1, 8), '\x05', immediate(2))
+ XORPD_xm = xmminsn('\x66', rex_nw, '\x0F\x57', register(1, 8), mem_reg_plus_const(2))
+
+ ANDPD_xm = xmminsn('\x66', rex_nw, '\x0F\x54', register(1, 8), mem_reg_plus_const(2))
# ------------------------------------------------------------
@@ -619,6 +623,35 @@
class X86_32_CodeBuilder(AbstractX86CodeBuilder):
WORD = 4
+ # We can do direct memory references on 32-bit
+ MOV_rj = insn(rex_w, '\x8B', register(1,8), '\x05', immediate(2))
+ MOV_jr = insn(rex_w, '\x89', register(2,8), '\x05', immediate(1))
+ MOV_ji = insn(rex_w, '\xC7', '\x05', immediate(1), immediate(2))
+ MOV8_ji = insn(rex_w, '\xC6', orbyte(0<<3), '\x05', immediate(1), immediate(2, 'b'))
+ MOV8_jr = insn(rex_w, '\x88', byte_register(2, 8), '\x05', immediate(1))
+ MOVZX8_rj = insn(rex_w, '\x0F\xB6', register(1,8), '\x05', immediate(2))
+
+ CMP_ji8 = insn(rex_w, '\x83', '\x3D', immediate(1), immediate(2, 'b'))
+ CMP_ji32 = insn(rex_w, '\x81', '\x3D', immediate(1), immediate(2))
+ CMP_ji = select_8_or_32_bit_immed(CMP_ji8, CMP_ji32)
+ CMP_rj = insn(rex_w, '\x3B', register(1, 8), '\x05', immediate(2))
+
+ # XXX: Bit of kludge, but works in 32-bit because the relative 32-bit
+ # displacement is always enough to encode any address
+ CALL_j = AbstractX86CodeBuilder.CALL_l
+
+ XCHG_rj = insn(rex_w, '\x87', register(1,8), '\x05', immediate(2))
+
+ MOVSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x10', register(1, 8), '\x05', immediate(2))
+ MOVSD_jx = xmminsn('\xF2', rex_nw, '\x0F\x11', register(2, 8), '\x05', immediate(1))
+ ADDSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x58', register(1, 8), '\x05', immediate(2))
+ SUBSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x5C', register(1, 8), '\x05', immediate(2))
+ MULSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x59', register(1, 8), '\x05', immediate(2))
+ DIVSD_xj = xmminsn('\xF2', rex_nw, '\x0F\x5E', register(1, 8), '\x05', immediate(2))
+ UCOMISD_xj = xmminsn('\x66', rex_nw, '\x0F\x2E', register(1, 8), '\x05', immediate(2))
+ ANDPD_xj = xmminsn('\x66', rex_nw, '\x0F\x54', register(1, 8), '\x05', immediate(2))
+ XORPD_xj = xmminsn('\x66', rex_nw, '\x0F\x57', register(1, 8), '\x05', immediate(2))
+
class X86_64_CodeBuilder(AbstractX86CodeBuilder):
WORD = 8
@@ -655,8 +688,18 @@
AbstractX86CodeBuilder.MOV_ri(self, R.eax, target)
AbstractX86CodeBuilder.CALL_r(self, R.eax)
+ # XXX
+ CALL_j = CALL_l
+
# ____________________________________________________________
-all_instructions = [name for name in AbstractX86CodeBuilder.__dict__
- if name.split('_')[0].isupper()]
+# FIXME: What about 32-bit only or 64-bit only instructions?
+# This is used to build the MachineCodeBlockWrapper. Missing
+# some instructions could possibly lead to subtle bugs.
+
+# FIXME: hack hack hack
+all_instructions = ([name for name in AbstractX86CodeBuilder.__dict__
+ if name.split('_')[0].isupper()] +
+ [name for name in X86_64_CodeBuilder.__dict__
+ if name.split('_')[0].isupper()])
all_instructions.sort()
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_assembler.py Sat Jun 12 00:19:21 2010
@@ -4,11 +4,15 @@
from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, INT, REF, FLOAT
from pypy.rlib.rarithmetic import intmask
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
+from pypy.jit.backend.x86.regalloc import WORD
+from pypy.jit.backend.detect_cpu import getcpuclass
+ACTUAL_CPU = getcpuclass()
class FakeCPU:
rtyper = None
supports_floats = True
+ NUM_REGS = ACTUAL_CPU.NUM_REGS
class FakeMC:
def __init__(self, base_address=0):
@@ -46,12 +50,12 @@
xmm2]
assert len(failargs) == len(locs)
assembler.write_failure_recovery_description(mc, failargs, locs)
- nums = [Assembler386.DESCR_INT + 4*(8+0),
- Assembler386.DESCR_REF + 4*(8+1),
- Assembler386.DESCR_FLOAT + 4*(8+10),
- Assembler386.DESCR_INT + 4*(8+100),
- Assembler386.DESCR_REF + 4*(8+101),
- Assembler386.DESCR_FLOAT + 4*(8+110),
+ nums = [Assembler386.DESCR_INT + 4*(16+0),
+ Assembler386.DESCR_REF + 4*(16+1),
+ Assembler386.DESCR_FLOAT + 4*(16+10),
+ Assembler386.DESCR_INT + 4*(16+100),
+ Assembler386.DESCR_REF + 4*(16+101),
+ Assembler386.DESCR_FLOAT + 4*(16+110),
Assembler386.CODE_HOLE,
Assembler386.CODE_HOLE,
Assembler386.DESCR_INT + 4*ebx.value,
@@ -169,8 +173,8 @@
assert loc >= 0
ofs = get_ebp_ofs(loc)
assert ofs < 0
- assert (ofs % 4) == 0
- stack[stacklen + ofs//4] = value
+ assert (ofs % WORD) == 0
+ stack[stacklen + ofs//WORD] = value
descr_bytecode = []
for i, (kind, loc) in enumerate(content):
@@ -207,7 +211,7 @@
if isinstance(loc, RegLoc):
num = kind + 4*loc.value
else:
- num = kind + 4*(8+loc)
+ num = kind + Assembler386.CODE_FROMSTACK + (4*loc)
while num >= 0x80:
descr_bytecode.append((num & 0x7F) | 0x80)
num >>= 7
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_gc_integration.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_gc_integration.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_gc_integration.py Sat Jun 12 00:19:21 2010
@@ -8,7 +8,7 @@
from pypy.jit.metainterp.resoperation import rop, ResOperation
from pypy.jit.backend.llsupport.descr import GcCache
from pypy.jit.backend.llsupport.gc import GcLLDescription
-from pypy.jit.backend.x86.runner import CPU
+from pypy.jit.backend.detect_cpu import getcpuclass
from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, FRAME_FIXED_SIZE
from pypy.jit.metainterp.test.oparser import parse
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
@@ -22,6 +22,8 @@
X86XMMRegisterManager
from pypy.rpython.annlowlevel import llhelper
+CPU = getcpuclass()
+
class MockGcRootMap(object):
def get_basic_shape(self):
return ['shape']
@@ -83,7 +85,7 @@
mark = regalloc.get_mark_gc_roots(cpu.gc_ll_descr.gcrootmap)
assert mark[0] == 'compressed'
base = -WORD * FRAME_FIXED_SIZE
- expected = ['ebx', 'esi', 'edi', base, base-4, base-8]
+ expected = ['ebx', 'esi', 'edi', base, base-WORD, base-WORD*2]
assert dict.fromkeys(mark[1:]) == dict.fromkeys(expected)
class TestRegallocGcIntegration(BaseTestRegalloc):
@@ -174,7 +176,7 @@
self.addrs[1] = self.addrs[0] + 64
# 64 bytes
def malloc_slowpath(size):
- assert size == 8
+ assert size == WORD*2
nadr = rffi.cast(lltype.Signed, self.nursery)
self.addrs[0] = nadr + size
return nadr
@@ -198,7 +200,7 @@
return rffi.cast(lltype.Signed, self.addrs)
def get_nursery_top_addr(self):
- return rffi.cast(lltype.Signed, self.addrs) + 4
+ return rffi.cast(lltype.Signed, self.addrs) + WORD
def get_malloc_fixedsize_slowpath_addr(self):
fptr = llhelper(lltype.Ptr(self.MALLOC_SLOWPATH), self.malloc_slowpath)
@@ -212,7 +214,7 @@
def setup_method(self, method):
cpu = CPU(None, None)
- cpu.vtable_offset = 4
+ cpu.vtable_offset = WORD
cpu.gc_ll_descr = GCDescrFastpathMalloc()
NODE = lltype.Struct('node', ('tid', lltype.Signed),
@@ -246,7 +248,7 @@
assert gc_ll_descr.nursery[0] == self.nodedescr.tid
assert gc_ll_descr.nursery[1] == 42
nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- assert gc_ll_descr.addrs[0] == nurs_adr + 8
+ assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*2)
def test_malloc_slowpath(self):
ops = '''
@@ -266,7 +268,7 @@
# this should call slow path once
gc_ll_descr = self.cpu.gc_ll_descr
nadr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- assert gc_ll_descr.addrs[0] == nadr + 8
+ assert gc_ll_descr.addrs[0] == nadr + (WORD*2)
def test_new_with_vtable(self):
ops = '''
@@ -281,4 +283,4 @@
assert gc_ll_descr.nursery[0] == self.descrsize.tid
assert gc_ll_descr.nursery[1] == self.vtable_int
nurs_adr = rffi.cast(lltype.Signed, gc_ll_descr.nursery)
- assert gc_ll_descr.addrs[0] == nurs_adr + 8
+ assert gc_ll_descr.addrs[0] == nurs_adr + (WORD*2)
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_recompilation.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_recompilation.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_recompilation.py Sat Jun 12 00:19:21 2010
@@ -1,6 +1,5 @@
-
-from pypy.jit.backend.x86.runner import CPU
from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
+from pypy.jit.backend.x86.regalloc import WORD
class TestRecompilation(BaseTestRegalloc):
def test_compile_bridge_not_deeper(self):
@@ -51,7 +50,9 @@
descr = loop.operations[2].descr
new = descr._x86_bridge_frame_depth
assert descr._x86_bridge_param_depth == 0
- assert new > previous
+ # XXX: Maybe add enough ops to force stack on 64-bit as well?
+ if WORD == 4:
+ assert new > previous
self.cpu.set_future_value_int(0, 0)
fail = self.run(loop)
assert fail.identifier == 2
@@ -111,7 +112,9 @@
guard_op = loop.operations[5]
loop_frame_depth = loop.token._x86_frame_depth
assert loop.token._x86_param_depth == 0
- assert guard_op.descr._x86_bridge_frame_depth > loop_frame_depth
+ # XXX: Maybe add enough ops to force stack on 64-bit as well?
+ if WORD == 4:
+ assert guard_op.descr._x86_bridge_frame_depth > loop_frame_depth
assert guard_op.descr._x86_bridge_param_depth == 0
self.cpu.set_future_value_int(0, 0)
self.cpu.set_future_value_int(1, 0)
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc.py Sat Jun 12 00:19:21 2010
@@ -7,7 +7,7 @@
BoxPtr, ConstPtr, LoopToken, BasicFailDescr
from pypy.jit.metainterp.resoperation import rop, ResOperation
from pypy.jit.backend.llsupport.descr import GcCache
-from pypy.jit.backend.x86.runner import CPU
+from pypy.jit.backend.detect_cpu import getcpuclass
from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, X86RegisterManager,\
FloatConstants
from pypy.jit.metainterp.test.oparser import parse
@@ -16,6 +16,7 @@
from pypy.rpython.lltypesystem import rclass, rstr
from pypy.jit.backend.x86.rx86 import *
+CPU = getcpuclass()
class MockGcDescr(GcCache):
def get_funcptr_for_new(self):
return 123
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc2.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc2.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_regalloc2.py Sat Jun 12 00:19:21 2010
@@ -2,7 +2,8 @@
from pypy.jit.metainterp.history import ResOperation, BoxInt, ConstInt,\
BoxPtr, ConstPtr, BasicFailDescr, LoopToken
from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.backend.x86.runner import CPU
+from pypy.jit.backend.detect_cpu import getcpuclass
+CPU = getcpuclass()
def test_bug_rshift():
v1 = BoxInt()
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_runner.py Sat Jun 12 00:19:21 2010
@@ -3,7 +3,7 @@
from pypy.jit.metainterp.history import ResOperation, LoopToken
from pypy.jit.metainterp.history import (BoxInt, BoxPtr, ConstInt, ConstPtr,
Box, BasicFailDescr)
-from pypy.jit.backend.x86.runner import CPU
+from pypy.jit.backend.detect_cpu import getcpuclass
from pypy.jit.backend.x86.regalloc import WORD
from pypy.jit.backend.llsupport import symbolic
from pypy.jit.metainterp.resoperation import rop
@@ -12,6 +12,8 @@
import ctypes
import sys
+CPU = getcpuclass()
+
class FakeStats(object):
pass
@@ -56,7 +58,7 @@
assert u.chars[3] == u'd'
@staticmethod
- def _resbuf(res, item_tp=ctypes.c_int):
+ def _resbuf(res, item_tp=ctypes.c_long):
return ctypes.cast(res.value._obj.intval, ctypes.POINTER(item_tp))
def test_allocations(self):
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py Sat Jun 12 00:19:21 2010
@@ -226,6 +226,8 @@
'SUB_ri', 'XOR_ri'):
if args[0] == rx86.R.eax:
return [] # ADD EAX, constant: there is a special encoding
+ if methname == 'XCHG_rr' and rx86.R.eax in args:
+ return [] # special encoding
if methname == 'MOV_rj' and args[0] == rx86.R.eax:
return [] # MOV EAX, [immediate]: there is a special encoding
if methname == 'MOV_jr' and args[1] == rx86.R.eax:
@@ -262,9 +264,18 @@
print "Skipping %s" % methname
return
+ # XXX: ugly way to deal with the differences between 32 and 64 bit
+ if not hasattr(self.X86_CodeBuilder, methname):
+ return
+
+ # XXX: hack hack hack
+ if methname == 'WORD':
+ return
+
+
print "Testing %s with argmodes=%r" % (instrname, argmodes)
self.methname = methname
- self.is_xmm_insn = getattr(getattr(rx86.AbstractX86CodeBuilder,
+ self.is_xmm_insn = getattr(getattr(self.X86_CodeBuilder,
methname), 'is_xmm_insn', False)
ilist = self.make_all_tests(methname, argmodes)
oplist, as_code = self.run_test(methname, instrname, argmodes, ilist)
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_symbolic_x86.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_symbolic_x86.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_symbolic_x86.py Sat Jun 12 00:19:21 2010
@@ -1,6 +1,7 @@
import py
from pypy.jit.backend.llsupport.symbolic import *
from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.jit.backend.x86.regalloc import WORD
# This test file is here and not in llsupport/test/ because it checks
# that we get correct numbers for a 32-bit machine.
@@ -19,32 +20,32 @@
ofs_z, size_z = get_field_token(S, 'z', False)
# ofs_x might be 0 or not, depending on how we count the headers
# but the rest should be as expected for a 386 machine
- assert size_x == size_y == size_z == 4
+ assert size_x == size_y == size_z == WORD
assert ofs_x >= 0
- assert ofs_y == ofs_x + 4
- assert ofs_z == ofs_x + 8
+ assert ofs_y == ofs_x + WORD
+ assert ofs_z == ofs_x + (WORD*2)
def test_struct_size():
ofs_z, size_z = get_field_token(S, 'z', False)
totalsize = get_size(S, False)
- assert totalsize == ofs_z + 4
+ assert totalsize == ofs_z + WORD
def test_primitive_size():
- assert get_size(lltype.Signed, False) == 4
+ assert get_size(lltype.Signed, False) == WORD
assert get_size(lltype.Char, False) == 1
- assert get_size(lltype.Ptr(S), False) == 4
+ assert get_size(lltype.Ptr(S), False) == WORD
def test_array_token():
A = lltype.GcArray(lltype.Char)
basesize, itemsize, ofs_length = get_array_token(A, False)
- assert basesize >= 4 # at least the 'length', maybe some gc headers
+ assert basesize >= WORD # at least the 'length', maybe some gc headers
assert itemsize == 1
- assert ofs_length == basesize - 4
+ assert ofs_length == basesize - WORD
A = lltype.GcArray(lltype.Signed)
basesize, itemsize, ofs_length = get_array_token(A, False)
- assert basesize >= 4 # at least the 'length', maybe some gc headers
- assert itemsize == 4
- assert ofs_length == basesize - 4
+ assert basesize >= WORD # at least the 'length', maybe some gc headers
+ assert itemsize == WORD
+ assert ofs_length == basesize - WORD
def test_varsized_struct_size():
S1 = lltype.GcStruct('S1', ('parent', S),
@@ -54,9 +55,9 @@
ofs_extra, size_extra = get_field_token(S1, 'extra', False)
basesize, itemsize, ofs_length = get_array_token(S1, False)
assert size_parent == ofs_extra
- assert size_extra == 4
- assert ofs_length == ofs_extra + 4
- assert basesize == ofs_length + 4
+ assert size_extra == WORD
+ assert ofs_length == ofs_extra + WORD
+ assert basesize == ofs_length + WORD
assert itemsize == 1
def test_string():
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_zll_random.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_zll_random.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/test/test_zll_random.py Sat Jun 12 00:19:21 2010
@@ -1,9 +1,11 @@
from pypy.jit.backend.test.test_random import check_random_function, Random
from pypy.jit.backend.test.test_ll_random import LLtypeOperationBuilder
-from pypy.jit.backend.x86.runner import CPU386
+from pypy.jit.backend.detect_cpu import getcpuclass
+
+CPU = getcpuclass()
def test_stress():
- cpu = CPU386(None, None)
+ cpu = CPU(None, None)
r = Random()
for i in range(1000):
check_random_function(cpu, LLtypeOperationBuilder, r, i, 1000)
Modified: pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/tool/viewcode.py
==============================================================================
--- pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/tool/viewcode.py (original)
+++ pypy/branch/x86-64-jit-backend/pypy/jit/backend/x86/tool/viewcode.py Sat Jun 12 00:19:21 2010
@@ -31,16 +31,22 @@
if sys.platform == "win32":
XXX # lots more in Psyco
-def machine_code_dump(data, originaddr):
- # the disassembler to use. 'objdump' writes GNU-style instructions.
- # 'ndisasm' would use Intel syntax, but you need to fix the output parsing.
- objdump = ('objdump -M intel -b binary -m i386 '
+def machine_code_dump(data, originaddr, backend_name):
+ objdump_backend_option = {
+ 'x86': 'i386',
+ 'x86_64': 'x86-64',
+ }
+ objdump = ('objdump -M intel,%(backend)s -b binary -m i386 '
'--adjust-vma=%(origin)d -D %(file)s')
#
f = open(tmpfile, 'wb')
f.write(data)
f.close()
- g = os.popen(objdump % {'file': tmpfile, 'origin': originaddr}, 'r')
+ g = os.popen(objdump % {
+ 'file': tmpfile,
+ 'origin': originaddr,
+ 'backend': objdump_backend_option[backend_name],
+ }, 'r')
result = g.readlines()
g.close()
return result[6:] # drop some objdump cruft
@@ -126,7 +132,7 @@
def disassemble(self):
if not hasattr(self, 'text'):
- lines = machine_code_dump(self.data, self.addr)
+ lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
# instead of adding symbol names in the dumps we could
# also make the 0xNNNNNNNN addresses be red and show the
# symbol name when the mouse is over them
@@ -171,10 +177,13 @@
self.jumps = {}
self.symbols = {}
self.logentries = {}
+ self.backend_name = None
def parse(self, f, textonly=True):
for line in f:
- if line.startswith('CODE_DUMP '):
+ if line.startswith('BACKEND '):
+ self.backend_name = line.split(' ')[1].strip()
+ elif line.startswith('CODE_DUMP '):
pieces = line.split()
assert pieces[1].startswith('@')
assert pieces[2].startswith('+')
More information about the Pypy-commit
mailing list