[pypy-svn] r70215 - in pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86: . test

arigo at codespeak.net arigo at codespeak.net
Sun Dec 20 11:56:16 CET 2009


Author: arigo
Date: Sun Dec 20 11:56:16 2009
New Revision: 70215

Added:
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regloc.py   (contents, props changed)
Modified:
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/assembler.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/jump.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regalloc.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/rx86.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/support.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_jump.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86.py
   pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
Log:
Simplify rx86 a bit, add SSE2 and CALL instructions.
Start to port the rest of the code.


Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/assembler.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/assembler.py	Sun Dec 20 11:56:16 2009
@@ -11,9 +11,14 @@
 from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte,\
      X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs, FRAME_FIXED_SIZE,\
      FORCE_INDEX_OFS
+from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
+                                         esp, ebp, esi, edi,
+                                         xmm0, xmm1, xmm2, xmm3,
+                                         xmm4, xmm5, xmm6, xmm7,
+                                         RegLoc, StackLoc)
 from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.jit.backend.x86 import rx86
 from pypy.jit.backend.x86 import codebuf
-from pypy.jit.backend.x86.ri386 import *
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.backend.x86.support import values_array
 from pypy.rlib.debug import debug_print
@@ -67,9 +72,8 @@
     method.func_name = name
     return method
 
-for name in dir(codebuf.MachineCodeBlock):
-    if name.upper() == name:
-        setattr(MachineCodeBlockWrapper, name, _new_method(name))
+for _name in rx86.all_instructions:
+    setattr(MachineCodeBlockWrapper, _name, _new_method(_name))
 
 class Assembler386(object):
     mc = None
@@ -146,8 +150,8 @@
         addr[5] = 2147483647       # / for abs
         addr[6] = 0                #
         addr[7] = 0                #
-        self.loc_float_const_neg = heap64(float_constants)
-        self.loc_float_const_abs = heap64(float_constants + 16)
+        self.loc_float_const_neg = float_constants
+        self.loc_float_const_abs = float_constants + 16
 
     def assemble_loop(self, inputargs, operations, looptoken):
         """adds the following attributes to looptoken:
@@ -219,7 +223,7 @@
 
     def _patchable_stackadjust(self):
         # stack adjustment LEA
-        self.mc.LEA(esp, fixedsize_ebp_ofs(0))
+        self.mc.LEA32_rb(esp.value, 0)
         return self.mc.tell() - 4
 
     def _patch_stackadjust(self, adr_lea, reserved_depth):
@@ -230,51 +234,92 @@
         # Given that [EBP] is where we saved EBP, i.e. in the last word
         # of our fixed frame, then the 'words' value is:
         words = (FRAME_FIXED_SIZE - 1) + reserved_depth
-        mc.write(packimm32(-WORD * words))
+        mc.writeimm32(-WORD * words)
         mc.done()
 
     def _assemble_bootstrap_code(self, inputargs, arglocs):
         nonfloatlocs, floatlocs = arglocs
-        self.mc.PUSH(ebp)
-        self.mc.MOV(ebp, esp)
-        self.mc.PUSH(ebx)
-        self.mc.PUSH(esi)
-        self.mc.PUSH(edi)
+        self.mc.PUSH_r(ebp.value)
+        self.mc.MOV_rr(ebp.value, esp.value)
+        self.mc.PUSH_r(ebx.value)
+        self.mc.PUSH_r(esi.value)
+        self.mc.PUSH_r(edi.value)
         # NB. the shape of the frame is hard-coded in get_basic_shape() too.
         # Also, make sure this is consistent with FRAME_FIXED_SIZE.
         adr_stackadjust = self._patchable_stackadjust()
-        tmp = X86RegisterManager.all_regs[0]
-        xmmtmp = X86XMMRegisterManager.all_regs[0]
+        #
+        self._bootstrap_load_floats(floatlocs)
+        #
+        # A bit of messy code to ensure that we load the value of 'esi'
+        # last, to let it be available as a temporary register in the
+        # preceeding parts.
         for i in range(len(nonfloatlocs)):
             loc = nonfloatlocs[i]
+            if loc is esi:
+                esi_type = inputargs[i].type
+                break
+        else:
+            i = 0
+            esi_type = INT
+        if esi_type == REF:
+            self._bootstrap_load(inputargs, nonfloatlocs,
+                                 INT, self.fail_boxes_int, 0)
+            self._bootstrap_load(inputargs, nonfloatlocs,
+                                 REF, self.fail_boxes_ptr, i)
+        else:
+            self._bootstrap_load(inputargs, nonfloatlocs,
+                                 REF, self.fail_boxes_ptr, 0)
+            self._bootstrap_load(inputargs, nonfloatlocs,
+                                 INT, self.fail_boxes_int, i)
+        #
+        return adr_stackadjust
+
+    def _bootstrap_load(self, inputargs, nonfloatlocs, type, fail_boxes, i):
+        # load all input arguments from 'nonfloatlocs' that are of the
+        # given type (INT or REF), loading the i'th argument last.
+        tmp = X86RegisterManager.all_regs[0]
+        adr = fail_boxes.get_base_addr()
+        self.mc.MOV_ri(tmp.value, adr)
+        for j in range(len(nonfloatlocs)):
+            i -= 1
+            if i < 0:
+                i = len(nonfloatlocs) - 1
+            loc = nonfloatlocs[i]
             if loc is None:
                 continue
-            if isinstance(loc, REG):
+            if inputargs[i].type != type:
+                continue
+            if isinstance(loc, RegLoc):
                 target = loc
             else:
-                target = tmp
-            if inputargs[i].type == REF:
+                target = esi
+            if type == REF:
                 # This uses XCHG to put zeroes in fail_boxes_ptr after
                 # reading them
-                self.mc.XOR(target, target)
-                adr = self.fail_boxes_ptr.get_addr_for_num(i)
-                self.mc.XCHG(target, heap(adr))
+                self.mc.XOR_rr(target.value, target.value)
+                self.mc.XCHG_rm(target.value, (tmp.value, WORD*i))
             else:
-                adr = self.fail_boxes_int.get_addr_for_num(i)
-                self.mc.MOV(target, heap(adr))
-            if target is not loc:
-                self.mc.MOV(loc, target)
+                self.mc.MOV_rm(target.value, (tmp.value, WORD*i))
+            if isinstance(loc, StackLoc):
+                self.mc.MOV_br(loc.value, target.value)
+
+    def _bootstrap_load_floats(self, floatlocs):
+        tmp = X86RegisterManager.all_regs[0]
+        loaded = False
         for i in range(len(floatlocs)):
             loc = floatlocs[i]
             if loc is None:
                 continue
-            adr = self.fail_boxes_float.get_addr_for_num(i)
-            if isinstance(loc, REG):
-                self.mc.MOVSD(loc, heap64(adr))
+            if not loaded:
+                adr = self.fail_boxes_float.get_base_addr()
+                self.mc.MOV_ri(tmp.value, adr)
+                loaded = True
+            if isinstance(loc, RegLoc):
+                self.mc.MOVSD_rm(loc.value, (tmp.value, 8*i))
             else:
-                self.mc.MOVSD(xmmtmp, heap64(adr))
-                self.mc.MOVSD(loc, xmmtmp)
-        return adr_stackadjust
+                xmmtmp = X86XMMRegisterManager.all_regs[0]
+                self.mc.MOVSD_rm(xmmtmp.value, (tmp.value, 8*i))
+                self.mc.MOVSD_sr(loc.value, xmmtmp.value)
 
     def dump(self, text):
         if not self.verbose:
@@ -359,7 +404,13 @@
 
     def _binaryop(asmop, can_swap=False):
         def genop_binary(self, op, arglocs, result_loc):
-            getattr(self.mc, asmop)(arglocs[0], arglocs[1])
+            loc0, loc1 = arglocs
+            if isinstance(loc1, RegLoc):
+                getattr(self.mc, asmop + '_rr')(loc0.value, loc1.value)
+            elif isinstance(loc1, StackLoc):
+                getattr(self.mc, asmop + '_rb')(loc0.value, loc1.value)
+            else:
+                getattr(self.mc, asmop + '_ri')(loc0.value, loc1.getint())
         return genop_binary
 
     def _cmpop(cond, rev_cond):
@@ -825,15 +876,15 @@
             if box is not None and box.type == FLOAT:
                 withfloats = True
                 break
-        mc.CALL(rel32(self.failure_recovery_code[exc + 2 * withfloats]))
+        mc.CALL_l(self.failure_recovery_code[exc + 2 * withfloats])
         # write tight data that describes the failure recovery
         faildescr._x86_failure_recovery_bytecode = mc.tell()
         self.write_failure_recovery_description(mc, failargs, fail_locs)
         # write the fail_index too
-        mc.write(packimm32(fail_index))
+        mc.writeimm32(fail_index)
         # for testing the decoding, write a final byte 0xCC
         if not we_are_translated():
-            mc.writechr(0xCC)
+            mc.writechar('\xCC')
             faildescr._x86_debug_faillocs = [loc for loc in fail_locs
                                                  if loc is not None]
         return addr
@@ -859,26 +910,19 @@
                 else:
                     raise AssertionError("bogus kind")
                 loc = locs[i]
-                if isinstance(loc, MODRM):
+                if isinstance(loc, StackLoc):
                     n = self.CODE_FROMSTACK//4 + loc.position
                 else:
-                    assert isinstance(loc, REG)
-                    n = loc.op
+                    assert isinstance(loc, RegLoc)
+                    n = loc.value
                 n = kind + 4*n
                 while n > 0x7F:
-                    mc.writechr((n & 0x7F) | 0x80)
+                    mc.writechar(chr((n & 0x7F) | 0x80))
                     n >>= 7
             else:
                 n = self.CODE_HOLE
-            mc.writechr(n)
-        mc.writechr(self.CODE_STOP)
-        # preallocate the fail_boxes
-        i = len(failargs) - 1
-        if i >= 0:
-            self.fail_boxes_int.get_addr_for_num(i)
-            self.fail_boxes_ptr.get_addr_for_num(i)
-            if self.cpu.supports_floats:
-                self.fail_boxes_float.get_addr_for_num(i)
+            mc.writechar(chr(n))
+        mc.writechar(chr(self.CODE_STOP))
 
     def rebuild_faillocs_from_descr(self, bytecode):
         from pypy.jit.backend.x86.regalloc import X86FrameManager
@@ -1044,19 +1088,19 @@
         # Assume that we are called at the beginning, when there is no risk
         # that 'mc' runs out of space.  Checked by asserts in mc.write().
         recovery_addr = mc.tell()
-        mc.PUSH(edi)
-        mc.PUSH(esi)
-        mc.PUSH(ebp)
-        mc.PUSH(esp)  # <-- not really used, but needed to take up the space
-        mc.PUSH(ebx)
-        mc.PUSH(edx)
-        mc.PUSH(ecx)
-        mc.PUSH(eax)
-        mc.MOV(esi, esp)
+        mc.PUSH_r(edi.value)
+        mc.PUSH_r(esi.value)
+        mc.PUSH_r(ebp.value)
+        mc.PUSH_r(esp.value)  # <-- not really used, but needed to take
+        mc.PUSH_r(ebx.value)                             # up the space
+        mc.PUSH_r(edx.value)
+        mc.PUSH_r(ecx.value)
+        mc.PUSH_r(eax.value)
+        mc.MOV_rr(esi.value, esp.value)
         if withfloats:
-            mc.SUB(esp, imm(8*8))
+            mc.SUB_ri(esp.value, 8*8)
             for i in range(8):
-                mc.MOVSD(mem64(esp, 8*i), xmm_registers[i])
+                mc.MOVSD_sr(8*i, i)
 
         # we call a provided function that will
         # - call our on_leave_jitted_hook which will mark
@@ -1064,7 +1108,7 @@
         #   avoid unwarranted freeing
         # - optionally save exception depending on the flag
         addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
-        mc.CALL(rel32(addr))
+        mc.CALL_l(addr)
 
         # the following call saves all values from the stack and from
         # registers to the right 'fail_boxes_<type>' location.
@@ -1073,66 +1117,63 @@
         # the XMM registers.  Moreover, esi[8] is a pointer to the recovery
         # bytecode, pushed just before by the CALL instruction written by
         # generate_quick_failure().
-        mc.PUSH(esi)
-        mc.CALL(rel32(failure_recovery_func))
+        mc.PUSH_r(esi.value)
+        mc.CALL_l(failure_recovery_func)
         # returns in eax the fail_index
 
         # now we return from the complete frame, which starts from
         # _assemble_bootstrap_code().  The LEA below throws away most
         # of the frame, including all the PUSHes that we did just above.
-        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
-        mc.POP(edi)    # [ebp-12]
-        mc.POP(esi)    # [ebp-8]
-        mc.POP(ebx)    # [ebp-4]
-        mc.POP(ebp)    # [ebp]
+        mc.LEA_rb(esp.value, -3 * WORD)
+        mc.POP_r(edi.value)    # [ebp-12]
+        mc.POP_r(esi.value)    # [ebp-8]
+        mc.POP_r(ebx.value)    # [ebp-4]
+        mc.POP_r(ebp.value)    # [ebp]
         mc.RET()
         self.mc2.done()
         self.failure_recovery_code[exc + 2 * withfloats] = recovery_addr
 
-    def generate_failure(self, mc, fail_index, locs, exc, locs_are_ref):
-        for i in range(len(locs)):
-            loc = locs[i]
-            if isinstance(loc, REG):
-                if loc.width == 8:
-                    adr = self.fail_boxes_float.get_addr_for_num(i)
-                    mc.MOVSD(heap64(adr), loc)
-                else:
-                    if locs_are_ref[i]:
-                        adr = self.fail_boxes_ptr.get_addr_for_num(i)
-                    else:
-                        adr = self.fail_boxes_int.get_addr_for_num(i)
-                    mc.MOV(heap(adr), loc)
-        for i in range(len(locs)):
-            loc = locs[i]
-            if not isinstance(loc, REG):
-                if loc.width == 8:
-                    mc.MOVSD(xmm0, loc)
-                    adr = self.fail_boxes_float.get_addr_for_num(i)
-                    mc.MOVSD(heap64(adr), xmm0)
-                else:
-                    if locs_are_ref[i]:
-                        adr = self.fail_boxes_ptr.get_addr_for_num(i)
-                    else:
-                        adr = self.fail_boxes_int.get_addr_for_num(i)
-                    mc.MOV(eax, loc)
-                    mc.MOV(heap(adr), eax)
+    def generate_failure_save(self, v, loc, i, tmpreg):
+        if v.type == FLOAT:
+            destadr = self.fail_boxes_float.get_addr_for_num(i)
+            self.mc.MOV_ri(tmpreg.value, destadr)
+            if isinstance(loc, RegLoc):
+                self.mc.MOVSD_mr((tmpreg.value, 0), loc.value)
+            else:
+                assert isinstance(loc, ConstInt)
+                value = rffi.cast(rffi.INTP, loc.value)[0]
+                self.mc.MOV_mi((tmpreg.value, 0), value)
+                value = rffi.cast(rffi.INTP, loc.value)[1]
+                self.mc.MOV_mi((tmpreg.value, 4), value)
+        else:
+            if v.type == REF:
+                destadr = self.fail_boxes_ref.get_addr_for_num(i)
+            else:
+                destadr = self.fail_boxes_int.get_addr_for_num(i)
+            self.mc.MOV_ri(tmpreg.value, destadr)
+            if isinstance(loc, RegLoc):
+                self.mc.MOV_mr((tmpreg.value, 0), loc.value)
+            else:
+                assert isinstance(loc, ConstInt)
+                self.mc.MOV_mi((tmpreg.value, 0), loc.value)
 
+    def generate_failure(self, fail_index, exc):
+        # avoid breaking the following code sequence, as we are almost done
+        mc = self.mc._mc
         # we call a provided function that will
         # - call our on_leave_jitted_hook which will mark
         #   the fail_boxes_ptr array as pointing to young objects to
         #   avoid unwarranted freeing
         # - optionally save exception depending on the flag
         addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
-        mc.CALL(rel32(addr))
-
-        # don't break the following code sequence!   xxx no reason any more?
-        mc = mc._mc
-        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
-        mc.MOV(eax, imm(fail_index))
-        mc.POP(edi)    # [ebp-12]
-        mc.POP(esi)    # [ebp-8]
-        mc.POP(ebx)    # [ebp-4]
-        mc.POP(ebp)    # [ebp]
+        mc.CALL_l(addr)
+        #
+        mc.LEA_rb(esp.value, -3 * WORD)
+        mc.MOV_ri(eax.value, fail_index)
+        mc.POP_r(edi.value)    # [ebp-12]
+        mc.POP_r(esi.value)    # [ebp-8]
+        mc.POP_r(ebx.value)    # [ebp-4]
+        mc.POP_r(ebp.value)    # [ebp]
         mc.RET()
 
     @specialize.arg(2)
@@ -1289,15 +1330,15 @@
                 return memsib(reg_or_imm1, reg_or_imm2, scale, offset)
     return addr_add
 
-addr8_add = new_addr_add(heap8, mem8, memSIB8)
-addr_add = new_addr_add(heap, mem, memSIB)
-addr64_add = new_addr_add(heap64, mem64, memSIB64)
-
-def addr_add_const(reg_or_imm1, offset):
-    if isinstance(reg_or_imm1, IMM32):
-        return heap(reg_or_imm1.value + offset)
-    else:
-        return mem(reg_or_imm1, offset)
+#addr8_add = new_addr_add(heap8, mem8, memSIB8)
+#addr_add = new_addr_add(heap, mem, memSIB)
+#addr64_add = new_addr_add(heap64, mem64, memSIB64)
+#
+#def addr_add_const(reg_or_imm1, offset):
+#    if isinstance(reg_or_imm1, IMM32):
+#        return heap(reg_or_imm1.value + offset)
+#    else:
+#        return mem(reg_or_imm1, offset)
 
 def round_up_to_4(size):
     if size < 4:

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/jump.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/jump.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/jump.py	Sun Dec 20 11:56:16 2009
@@ -1,6 +1,6 @@
 import sys
 from pypy.tool.pairtype import extendabletype
-from pypy.jit.backend.x86.regalloc import AssemblerLocation, StackLoc, RegLoc
+from pypy.jit.backend.x86.regloc import AssemblerLocation, StackLoc, RegLoc
 
 
 def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regalloc.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regalloc.py	Sun Dec 20 11:56:16 2009
@@ -5,14 +5,16 @@
 from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
                                          ResOperation, ConstAddr, BoxPtr,
                                          LoopToken, INT, REF, FLOAT)
-from pypy.jit.backend.x86 import rx86
+from pypy.jit.backend.x86.regloc import (StackLoc, eax, ecx, edx, ebx,
+                                         esp, ebp, esi, edi,
+                                         xmm0, xmm1, xmm2, xmm3,
+                                         xmm4, xmm5, xmm6, xmm7)
 from pypy.rpython.lltypesystem import lltype, ll2ctypes, rffi, rstr
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib import rgc
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.metainterp.history import AbstractValue
 from pypy.jit.backend.llsupport.descr import BaseFieldDescr, BaseArrayDescr
 from pypy.jit.backend.llsupport.descr import BaseCallDescr, BaseSizeDescr
 from pypy.jit.backend.llsupport.regalloc import FrameManager, RegisterManager,\
@@ -22,37 +24,6 @@
 FRAME_FIXED_SIZE = 5     # ebp + ebx + esi + edi + force_index = 5 words
 FORCE_INDEX_OFS = -4*WORD
 
-
-class AssemblerLocation(AbstractValue):
-    __slots__ = 'value'
-    def _getregkey(self):
-        return self.value
-
-class StackLoc(AssemblerLocation):
-    def __init__(self, position, ebp_offset):
-        assert ebp_offset < 0   # so no confusion with RegLoc._loc
-        self.position = position
-        self.value = ebp_offset
-    def __repr__(self):
-        return '%d(%%ebp)' % (self.value,)
-
-class RegLoc(AssemblerLocation):
-    def __init__(self, regnum):
-        assert regnum >= 0
-        self.value = regnum
-    def __repr__(self):
-        return rx86.R.names[self.value]
-
-class XmmRegLoc(RegLoc):
-    def __repr__(self):
-        return rx86.R.xmmnames[self.value]
-
-REGLOCS = [RegLoc(i) for i in range(8)]
-XMMREGLOCS = [XmmRegLoc(i) for i in range(8)]
-eax, ecx, edx, ebx, esp, ebp, esi, edi = REGLOCS
-xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 = XMMREGLOCS
-
-
 width_of_type = {
     INT : 1,
     REF : 1,
@@ -66,6 +37,9 @@
     no_lower_byte_regs = [esi, edi]
     save_around_call_regs = [eax, edx, ecx]
 
+    def convert_to_imm(self, c):
+        return c
+
     def call_result_location(self, v):
         return eax
 
@@ -87,23 +61,22 @@
     def __init__(self, longevity, frame_manager=None, assembler=None):
         RegisterManager.__init__(self, longevity, frame_manager=frame_manager,
                                  assembler=assembler)
-        self.constant_arrays = [self.new_const_array()]
-        self.constant_array_counter = 0
+        self.past_constant_arrays = []
+        self.cur_constant_array = None
+        self.constant_array_counter = BASE_CONSTANT_SIZE
+
+    def convert_to_imm(self, c):
+        if self.constant_array_counter >= BASE_CONSTANT_SIZE:
+            self.cur_constant_array = self.new_const_array()
+            self.past_constant_arrays.append(self.cur_constant_array)
+            self.constant_array_counter = 0
+        res = self.constant_array_counter
+        self.constant_array_counter += 1
+        arr = self.cur_constant_array
+        arr[res] = c.getfloat()
+        addr = rffi.cast(lltype.Signed, arr) + res * 8
+        return ConstInt(addr)
 
-##    def convert_to_imm(self, c):
-##        if self.constant_array_counter >= BASE_CONSTANT_SIZE:
-##            self.constant_arrays.append(self.new_const_array())
-##            self.constant_array_counter = 0
-##        res = self.constant_array_counter
-##        self.constant_array_counter += 1
-##        arr = self.constant_arrays[-1]
-##        arr[res] = c.getfloat()
-##        return self.get_addr_of_const_float(-1, res)
-
-    def get_addr_of_const_float(self, num_arr, num_pos):
-        arr = self.constant_arrays[num_arr]
-        return heap64(rffi.cast(lltype.Signed, arr) + num_pos * WORD * 2)
-        
     def after_call(self, v):
         # the result is stored in st0, but we don't have this around,
         # so genop_call will move it to some frame location immediately
@@ -167,8 +140,8 @@
     def _process_inputargs(self, inputargs):
         # XXX we can sort out here by longevity if we need something
         # more optimal
-        floatlocs = [-1] * len(inputargs)
-        nonfloatlocs = [-1] * len(inputargs)
+        floatlocs = [None] * len(inputargs)
+        nonfloatlocs = [None] * len(inputargs)
         # Don't use all_regs[0] for passing arguments around a loop.
         # Must be kept in sync with consider_jump().
         # XXX this should probably go to llsupport/regalloc.py
@@ -412,12 +385,17 @@
     consider_guard_isnull = _consider_guard
 
     def consider_finish(self, op, ignored):
-        locs = [self.loc(v) for v in op.args]
-        locs_are_ref = [v.type == REF for v in op.args]
+        # xxx assumes no duplicate Boxes in op.args
+        tmpvar = TempBox()
+        tmpreg = self.rm.force_allocate_reg(tmpvar)
+        for i in range(len(op.args)):
+            v = op.args[i]
+            loc = self.make_sure_var_in_reg(v)
+            self.assembler.generate_failure_save(v, loc, i, tmpreg)
+            self.possibly_free_var(v)
         fail_index = self.assembler.cpu.get_fail_descr_number(op.descr)
-        self.assembler.generate_failure(self.assembler.mc, fail_index, locs,
-                                        self.exc, locs_are_ref)
-        self.possibly_free_vars(op.args)
+        self.assembler.generate_failure(fail_index, self.exc)
+        self.rm.possibly_free_var(tmpvar)
 
     def consider_guard_no_exception(self, op, ignored):
         self.perform_guard(op, [], None)

Added: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regloc.py
==============================================================================
--- (empty file)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/regloc.py	Sun Dec 20 11:56:16 2009
@@ -0,0 +1,35 @@
+from pypy.jit.metainterp.history import AbstractValue
+from pypy.jit.backend.x86 import rx86
+
+
+class AssemblerLocation(AbstractValue):
+    __slots__ = 'value'
+    _immutable_ = True
+    def _getregkey(self):
+        return self.value
+
+class StackLoc(AssemblerLocation):
+    _immutable_ = True
+    def __init__(self, position, ebp_offset):
+        assert ebp_offset < 0   # so no confusion with RegLoc.value
+        self.position = position
+        self.value = ebp_offset
+    def __repr__(self):
+        return '%d(%%ebp)' % (self.value,)
+
+class RegLoc(AssemblerLocation):
+    _immutable_ = True
+    def __init__(self, regnum, is_xmm):
+        assert regnum >= 0
+        self.value = regnum
+        self.is_xmm = is_xmm
+    def __repr__(self):
+        if self.is_xmm:
+            return rx86.R.xmmnames[self.value]
+        else:
+            return rx86.R.names[self.value]
+
+REGLOCS = [RegLoc(i, is_xmm=False) for i in range(8)]
+XMMREGLOCS = [RegLoc(i, is_xmm=True) for i in range(8)]
+eax, ecx, edx, ebx, esp, ebp, esi, edi = REGLOCS
+xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 = XMMREGLOCS

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/rx86.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/rx86.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/rx86.py	Sun Dec 20 11:56:16 2009
@@ -1,5 +1,5 @@
 import py
-from pypy.rlib.rarithmetic import intmask, r_ulonglong
+from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.objectmodel import ComputedIntSymbolic, we_are_translated
 from pypy.rlib.objectmodel import specialize
 from pypy.rlib.unroll import unrolling_iterable
@@ -27,16 +27,6 @@
 def fits_in_32bits(value):
     return -2147483648 <= value <= 2147483647
 
-def intmask32(value):
-    # extract the 32 lower bits of 'value', returning a regular signed int
-    # (it is negative if 'value' is negative according to the 32-bits repr)
-    return intmask(rffi.cast(rffi.INT, value))
-
-def cast32to64(value):
-    # returns 'value' in the 32 lower bits of a 64-bit integer,
-    # with the remaining bits set to 0 (even if value is negative).
-    return r_ulonglong(rffi.cast(rffi.UINT, value))
-
 # ____________________________________________________________
 # Emit a single char
 
@@ -103,10 +93,22 @@
     return encode_immediate, argnum, width, None
 
 # ____________________________________________________________
+# Emit an immediate displacement (relative to the cur insn)
+
+def encode_relative(mc, target, _, orbyte):
+    assert orbyte == 0
+    offset = target - (mc.tell() + 4)
+    mc.writeimm32(offset)
+    return 0
+
+def relative(argnum):
+    return encode_relative, argnum, None, None
+
+# ____________________________________________________________
 # Emit a mod/rm referencing a stack location [EBP+offset]
 
 @specialize.arg(2)
-def encode_stack(mc, offset, force_32bits, orbyte):
+def encode_stack_bp(mc, offset, force_32bits, orbyte):
     if not force_32bits and single_byte(offset):
         mc.writechar(chr(0x40 | orbyte | R.ebp))
         mc.writeimm8(offset)
@@ -116,23 +118,39 @@
         mc.writeimm32(offset)
     return 0
 
-def stack(argnum, force_32bits=False):
-    return encode_stack, argnum, force_32bits, None
+def stack_bp(argnum, force_32bits=False):
+    return encode_stack_bp, argnum, force_32bits, None
+
+# ____________________________________________________________
+# Emit a mod/rm referencing a stack location [ESP+offset]
+
+def encode_stack_sp(mc, offset, _, orbyte):
+    SIB = chr((R.esp<<3) | R.esp)    #   use [esp+(no index)+offset]
+    if offset == 0:
+        mc.writechar(chr(0x04 | orbyte))
+        mc.writechar(SIB)
+    elif single_byte(offset):
+        mc.writechar(chr(0x44 | orbyte))
+        mc.writechar(SIB)
+        mc.writeimm8(offset)
+    else:
+        assert fits_in_32bits(offset)
+        mc.writechar(chr(0x84 | orbyte))
+        mc.writechar(SIB)
+        mc.writeimm32(offset)
+    return 0
+
+def stack_sp(argnum):
+    return encode_stack_sp, argnum, None, None
 
 # ____________________________________________________________
 # Emit a mod/rm referencing a memory location [reg1+offset]
 
-def reg_offset(reg, offset):
-    # returns a 64-bits integer encoding "reg1+offset".
-    # * 'offset' is stored as bytes 1-4 of the result;
-    # * 'reg1' is stored as byte 5 of the result.
+def encode_mem_reg_plus_const(mc, (reg, offset), _, orbyte):
     assert reg != R.esp and reg != R.ebp
     assert fits_in_32bits(offset)
-    return (r_ulonglong(reg) << 32) | cast32to64(offset)
-
-def encode_mem_reg_plus_const(mc, reg1_offset, _, orbyte):
-    reg1 = reg_number_3bits(mc, intmask(reg1_offset >> 32))
-    offset = intmask32(reg1_offset)
+    #
+    reg1 = reg_number_3bits(mc, reg)
     no_offset = offset == 0
     SIB = -1
     # 64-bits special cases for reg1 == r12 or r13
@@ -156,9 +174,8 @@
         mc.writeimm32(offset)
     return 0
 
-def rex_mem_reg_plus_const(mc, reg1_offset, _):
-    reg1 = intmask(reg1_offset >> 32)
-    if reg1 >= 8:
+def rex_mem_reg_plus_const(mc, (reg, offset), _):
+    if reg >= 8:
         return REX_B
     return 0
 
@@ -168,39 +185,22 @@
 # ____________________________________________________________
 # Emit a mod/rm referencing an array memory location [reg1+reg2*scale+offset]
 
-def reg_reg_scaleshift_offset(reg1, reg2, scaleshift, offset):
-    # returns a 64-bits integer encoding "reg1+reg2<<scaleshift+offset".
-    # * 'offset' is stored as bytes 1-4 of the result;
-    # * the SIB byte is computed and stored as byte 5 of the result;
-    # * for 64-bits mode, the optional REX.B and REX.X flags go to byte 6.
-    assert 0 <= reg1 < 16 and reg1 != R.ebp
-    assert 0 <= reg2 < 16 and reg2 != R.esp
+def encode_mem_reg_plus_scaled_reg_plus_const(mc,
+                                              (reg1, reg2, scaleshift, offset),
+                                              _, orbyte):
+    # emit "reg1 + (reg2 << scaleshift) + offset"
+    assert reg1 != R.ebp and reg2 != R.esp
     assert 0 <= scaleshift < 4
     assert fits_in_32bits(offset)
-    encoding = 0
-    if reg1 >= 8:
-        encoding |= REX_B << 8
-        reg1 &= 7
-    if reg2 >= 8:
-        encoding |= REX_X << 8
-        reg2 &= 7
-    encoding |= (scaleshift<<6) | (reg2<<3) | reg1
-    return (r_ulonglong(encoding) << 32) | cast32to64(offset)
-
-def encode_mem_reg_plus_scaled_reg_plus_const(mc, reg1_reg2_scaleshift_offset,
-                                              _, orbyte):
-    encoding = intmask(reg1_reg2_scaleshift_offset >> 32)
-    if mc.WORD == 4:
-        assert encoding <= 0xFF    # else registers r8..r15 have been used
-        SIB = chr(encoding)
-    else:
-        SIB = chr(encoding & 0xFF)
-    offset = intmask32(reg1_reg2_scaleshift_offset)
+    reg1 = reg_number_3bits(mc, reg1)
+    reg2 = reg_number_3bits(mc, reg2)
+    SIB = chr((scaleshift<<6) | (reg2<<3) | reg1)
+    #
     no_offset = offset == 0
     # 64-bits special case for reg1 == r13
     # (which look like ebp after being truncated to 3 bits)
     if mc.WORD == 8:
-        if (encoding & 7) == R.ebp:
+        if reg1 == R.ebp:
             no_offset = False
     # end of 64-bits special case
     if no_offset:
@@ -216,8 +216,13 @@
         mc.writeimm32(offset)
     return 0
 
-def rex_mem_reg_plus_scaled_reg_plus_const(mc, reg1_reg2_scaleshift_offset, _):
-    return intmask(reg1_reg2_scaleshift_offset >> (32+8))
+def rex_mem_reg_plus_scaled_reg_plus_const(mc,
+                                           (reg1, reg2, scaleshift, offset),
+                                           _):
+    rex = 0
+    if reg1 >= 8: rex |= REX_B
+    if reg2 >= 8: rex |= REX_X
+    return rex
 
 def mem_reg_plus_scaled_reg_plus_const(argnum):
     return (encode_mem_reg_plus_scaled_reg_plus_const, argnum, None,
@@ -232,34 +237,36 @@
 REX_B = 1
 
 @specialize.arg(2)
-def encode_rex(mc, _, basevalue, orbyte):
+def encode_rex(mc, rexbyte, basevalue, orbyte):
     if mc.WORD == 8:
-        assert 0 <= orbyte < 8
-        if basevalue != 0x40 or orbyte != 0:
-            mc.writechar(chr(basevalue | orbyte))
+        assert 0 <= rexbyte < 8
+        if basevalue != 0x40 or rexbyte != 0:
+            mc.writechar(chr(basevalue | rexbyte))
     else:
-        assert orbyte == 0
+        assert rexbyte == 0
     return 0
 
-rex_w  = encode_rex, None, (0x40 | REX_W), None
-rex_nw = encode_rex, None, 0x40, None
+rex_w  = encode_rex, 0, (0x40 | REX_W), None
+rex_nw = encode_rex, 0, 0x40, None
 
 # ____________________________________________________________
 
 def insn(*encoding):
     def encode(mc, *args):
-        orbyte = 0
+        rexbyte = 0
         if mc.WORD == 8:
             # compute the REX byte, if any
             for encode_step, arg, extra, rex_step in encoding_steps:
                 if rex_step:
                     if arg is not None:
                         arg = args[arg-1]
-                    orbyte |= rex_step(mc, arg, extra)
+                    rexbyte |= rex_step(mc, arg, extra)
+        args = (rexbyte,) + args
         # emit the bytes of the instruction
+        orbyte = 0
         for encode_step, arg, extra, rex_step in encoding_steps:
             if arg is not None:
-                arg = args[arg-1]
+                arg = args[arg]
             orbyte = encode_step(mc, arg, extra, orbyte)
         assert orbyte == 0
     #
@@ -274,22 +281,27 @@
     encoding_steps = unrolling_iterable(encoding_steps)
     return encode
 
+def xmminsn(*encoding):
+    encode = insn(*encoding)
+    encode.is_xmm_insn = True
+    return encode
+
 def common_modes(group):
     base = group * 8
-    INSN_ri8 = insn(rex_w, '\x83', orbyte(group<<3), register(1), '\xC0',
-                    immediate(2,'b'))
-    INSN_ri32 = insn(rex_w, '\x81', orbyte(group<<3), register(1), '\xC0',
-                     immediate(2))
+    char = chr(0xC0 | base)
+    INSN_ri8 = insn(rex_w, '\x83', register(1), char, immediate(2,'b'))
+    INSN_ri32 = insn(rex_w, '\x81', register(1), char, immediate(2))
     INSN_rr = insn(rex_w, chr(base+1), register(2,8), register(1,1), '\xC0')
-    INSN_rs = insn(rex_w, chr(base+3), register(1,8), stack(2))
+    INSN_rb = insn(rex_w, chr(base+3), register(1,8), stack_bp(2))
 
     def INSN_ri(mc, reg, immed):
         if single_byte(immed):
             INSN_ri8(mc, reg, immed)
         else:
             INSN_ri32(mc, reg, immed)
+    INSN_ri._always_inline_ = True      # try to constant-fold single_byte()
 
-    return INSN_ri, INSN_rr, INSN_rs
+    return INSN_ri, INSN_rr, INSN_rb
 
 # ____________________________________________________________
 
@@ -314,16 +326,18 @@
         self.writechar(chr((imm >> 16) & 0xFF))
         self.writechar(chr((imm >> 24) & 0xFF))
 
+    # ------------------------------ MOV ------------------------------
+
     MOV_ri = insn(rex_w, register(1), '\xB8', immediate(2, 'q'))
-    #MOV_si = insn(rex_w, '\xC7', orbyte(0<<3), stack(1), immediate(2))
     MOV_rr = insn(rex_w, '\x89', register(2,8), register(1), '\xC0')
-    MOV_sr = insn(rex_w, '\x89', register(2,8), stack(1))
-    MOV_rs = insn(rex_w, '\x8B', register(1,8), stack(2))
+    MOV_br = insn(rex_w, '\x89', register(2,8), stack_bp(1))
+    MOV_rb = insn(rex_w, '\x8B', register(1,8), stack_bp(2))
 
     # "MOV reg1, [reg2+offset]" and the opposite direction
     MOV_rm = insn(rex_w, '\x8B', register(1,8), mem_reg_plus_const(2))
     MOV_mr = insn(rex_w, '\x89', register(2,8), mem_reg_plus_const(1))
-    #MOV_mi = insn(rex_w, '\xC7', mem_reg_plus_const(1), immediate(2))
+    MOV_mi = insn(rex_w, '\xC7', orbyte(0<<3), mem_reg_plus_const(1),
+                                               immediate(2, 'i'))
 
     # "MOV reg1, [reg2+reg3*scale+offset]" and the opposite direction
     MOV_ra = insn(rex_w, '\x8B', register(1,8),
@@ -335,20 +349,46 @@
     MOV_rj = insn(rex_w, '\x8B', register(1,8), '\x05', immediate(2))
     MOV_jr = insn(rex_w, '\x89', register(2,8), '\x05', immediate(1))
 
-    ADD_ri, ADD_rr, ADD_rs = common_modes(0)
-    OR_ri,  OR_rr,  OR_rs  = common_modes(1)
-    AND_ri, AND_rr, AND_rs = common_modes(4)
-    SUB_ri, SUB_rr, SUB_rs = common_modes(5)
-    XOR_ri, XOR_rr, XOR_rs = common_modes(6)
-    CMP_ri, CMP_rr, CMP_rs = common_modes(7)
+    # ------------------------------ Arithmetic ------------------------------
+
+    ADD_ri, ADD_rr, ADD_rb = common_modes(0)
+    OR_ri,  OR_rr,  OR_rb  = common_modes(1)
+    AND_ri, AND_rr, AND_rb = common_modes(4)
+    SUB_ri, SUB_rr, SUB_rb = common_modes(5)
+    XOR_ri, XOR_rr, XOR_rb = common_modes(6)
+    CMP_ri, CMP_rr, CMP_rb = common_modes(7)
+
+    # ------------------------------ Misc stuff ------------------------------
 
     NOP = insn('\x90')
     RET = insn('\xC3')
 
     PUSH_r = insn(rex_nw, register(1), '\x50')
+    POP_r = insn(rex_nw, register(1), '\x58')
+
+    LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
+    LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
+
+    CALL_l = insn('\xE8', relative(1))
+    CALL_r = insn(rex_nw, '\xFF', register(1), chr(0xC0 | (2<<3)))
+    CALL_b = insn('\xFF', orbyte(2<<3), stack_bp(1))
+
+    XCHG_rm = insn(rex_w, '\x87', register(1,8), mem_reg_plus_const(2))
+
+    # ------------------------------ SSE2 ------------------------------
+
+    MOVSD_rr = xmminsn('\xF2', rex_nw, '\x0F\x10', register(1,8), register(2),
+                                                              '\xC0')
+    MOVSD_rb = xmminsn('\xF2', rex_nw, '\x0F\x10', register(1,8), stack_bp(2))
+    MOVSD_br = xmminsn('\xF2', rex_nw, '\x0F\x11', register(2,8), stack_bp(1))
+    MOVSD_rs = xmminsn('\xF2', rex_nw, '\x0F\x10', register(1,8), stack_sp(2))
+    MOVSD_sr = xmminsn('\xF2', rex_nw, '\x0F\x11', register(2,8), stack_sp(1))
+    MOVSD_rm = xmminsn('\xF2', rex_nw, '\x0F\x10', register(1,8),
+                                                     mem_reg_plus_const(2))
+    MOVSD_mr = xmminsn('\xF2', rex_nw, '\x0F\x11', register(2,8),
+                                                     mem_reg_plus_const(1))
 
-    LEA_rs = insn(rex_w, '\x8D', register(1,8), stack(2))
-    LEA32_rs = insn(rex_w, '\x8D', register(1,8), stack(2, force_32bits=True))
+    # ------------------------------------------------------------
 
 
 class X86_32_CodeBuilder(AbstractX86CodeBuilder):
@@ -359,7 +399,8 @@
     WORD = 8
 
     def writeimm64(self, imm):
-        self.writeimm32(intmask32(imm))
+        imm32 = intmask(rffi.cast(rffi.INT, imm))
+        self.writeimm32(imm32)
         self.writeimm32(imm >> 32)
 
     # MOV_ri from the parent class is not wrong, but here is a better encoding
@@ -372,6 +413,16 @@
         else:
             AbstractX86CodeBuilder.MOV_ri(self, reg, immed)
 
+    # case of a 64-bit immediate: encode via RAX (assuming it's ok to
+    # randomly change this register at that point in time)
+    def CALL_l(self, target):
+        offset = target - (self.tell() + 5)
+        if fits_in_32bits(offset):
+            AbstractX86CodeBuilder.CALL_l(self, target)
+        else:
+            AbstractX86CodeBuilder.MOV_ri(self, R.eax, target)
+            AbstractX86CodeBuilder.CALL_r(self, R.eax)
+
     # unsupported -- must use e.g. MOV tmpreg, immed64; MOV reg, [tmpreg]
     def MOV_rj(self, reg, mem_immed):
         py.test.skip("MOV_rj unsupported")

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/support.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/support.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/support.py	Sun Dec 20 11:56:16 2009
@@ -1,4 +1,4 @@
-from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rpython.lltypesystem import lltype, rffi
 
 def values_array(TP, size):
     ATP = lltype.GcArray(TP)
@@ -11,6 +11,9 @@
             return rffi.cast(lltype.Signed, lltype.direct_ptradd(
                 lltype.direct_arrayitems(self.ar), i))
 
+        def get_base_addr(self):
+            return rffi.cast(lltype.Signed, lltype.direct_arrayitems(self.ar))
+
         def setitem(self, i, v):
             self.ar[i] = v
 

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_jump.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_jump.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_jump.py	Sun Dec 20 11:56:16 2009
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.history import ConstInt
 from pypy.jit.backend.x86.regalloc import X86FrameManager
-from pypy.jit.backend.x86.regalloc import StackLoc, RegLoc
-from pypy.jit.backend.x86.regalloc import eax, ebx, ecx, edx, esi, edi
+from pypy.jit.backend.x86.regloc import StackLoc, RegLoc
+from pypy.jit.backend.x86.regloc import eax, ebx, ecx, edx, esi, edi
 from pypy.jit.backend.x86.jump import remap_frame_layout
 
 frame_pos = X86FrameManager.frame_pos

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86.py	Sun Dec 20 11:56:16 2009
@@ -1,4 +1,4 @@
-import py
+import py, struct
 from pypy.jit.backend.x86.rx86 import *
 globals().update(R.__dict__)
 
@@ -7,11 +7,15 @@
         self.buffer = []
 
     def writechar(self, c):
+        assert isinstance(c, str) and len(c) == 1
         self.buffer.append(c)    # append a character
 
     def getvalue(self):
         return ''.join(self.buffer)
 
+    def tell(self):
+        return 0x76543210 + len(self.buffer)
+
 
 class CodeBuilder32(CodeBuilderMixin, X86_32_CodeBuilder):
     pass
@@ -26,44 +30,44 @@
     s.MOV_rr(ebx, ebp)
     assert s.getvalue() == '\x89\xEB'
 
-def test_mov_sr():
+def test_mov_br():
     s = CodeBuilder32()
-    s.MOV_sr(-36, edx)
+    s.MOV_br(-36, edx)
     assert s.getvalue() == '\x89\x55\xDC'
 
-def test_mov_rs():
+def test_mov_rb():
     s = CodeBuilder32()
-    s.MOV_rs(edx, -36)
+    s.MOV_rb(edx, -36)
     assert s.getvalue() == '\x8B\x55\xDC'
 
 def test_mov_rm():
     s = CodeBuilder32()
-    s.MOV_rm(edx, reg_offset(edi, 0))
-    s.MOV_rm(edx, reg_offset(edi, -128))
-    s.MOV_rm(edx, reg_offset(edi, 128))
+    s.MOV_rm(edx, (edi, 0))
+    s.MOV_rm(edx, (edi, -128))
+    s.MOV_rm(edx, (edi, 128))
     assert s.getvalue() == '\x8B\x17\x8B\x57\x80\x8B\x97\x80\x00\x00\x00'
 
 def test_mov_mr():
     s = CodeBuilder32()
-    s.MOV_mr(reg_offset(edi, 0), edx)
-    s.MOV_mr(reg_offset(edi, -128), edx)
-    s.MOV_mr(reg_offset(edi, 128), edx)
+    s.MOV_mr((edi, 0), edx)
+    s.MOV_mr((edi, -128), edx)
+    s.MOV_mr((edi, 128), edx)
     assert s.getvalue() == '\x89\x17\x89\x57\x80\x89\x97\x80\x00\x00\x00'
 
 def test_mov_ra():
     s = CodeBuilder32()
-    s.MOV_ra(edx, reg_reg_scaleshift_offset(esi, edi, 2, 0))
-    s.MOV_ra(edx, reg_reg_scaleshift_offset(esi, edi, 2, -128))
-    s.MOV_ra(edx, reg_reg_scaleshift_offset(esi, edi, 2, 128))
+    s.MOV_ra(edx, (esi, edi, 2, 0))
+    s.MOV_ra(edx, (esi, edi, 2, -128))
+    s.MOV_ra(edx, (esi, edi, 2, 128))
     assert s.getvalue() == ('\x8B\x14\xBE' +
                             '\x8B\x54\xBE\x80' +
                             '\x8B\x94\xBE\x80\x00\x00\x00')
 
 def test_mov_ar():
     s = CodeBuilder32()
-    s.MOV_ar(reg_reg_scaleshift_offset(esi, edi, 2, 0), edx)
-    s.MOV_ar(reg_reg_scaleshift_offset(esi, edi, 2, -128), edx)
-    s.MOV_ar(reg_reg_scaleshift_offset(esi, edi, 2, 128), edx)
+    s.MOV_ar((esi, edi, 2, 0), edx)
+    s.MOV_ar((esi, edi, 2, -128), edx)
+    s.MOV_ar((esi, edi, 2, 128), edx)
     assert s.getvalue() == ('\x89\x14\xBE' +
                             '\x89\x54\xBE\x80' +
                             '\x89\x94\xBE\x80\x00\x00\x00')
@@ -74,16 +78,22 @@
     s.ADD_rr(eax, eax)
     assert s.getvalue() == '\x90\x01\xC0'
 
-def test_lea_rs():
+def test_lea_rb():
     s = CodeBuilder32()
-    s.LEA_rs(ecx, -36)
+    s.LEA_rb(ecx, -36)
     assert s.getvalue() == '\x8D\x4D\xDC'
 
-def test_lea32_rs():
+def test_lea32_rb():
     s = CodeBuilder32()
-    s.LEA32_rs(ecx, -36)
+    s.LEA32_rb(ecx, -36)
     assert s.getvalue() == '\x8D\x8D\xDC\xFF\xFF\xFF'
 
+def test_call_l(s=None):
+    s = s or CodeBuilder32()
+    s.CALL_l(0x01234567)
+    ofs = 0x01234567 - (0x76543210+5)
+    assert s.getvalue() == '\xE8' + struct.pack("<i", ofs)
+
 
 class CodeBuilder64(CodeBuilderMixin, X86_64_CodeBuilder):
     pass
@@ -97,12 +107,21 @@
 
 def test_mov_rm_64():
     s = CodeBuilder64()
-    s.MOV_rm(edx, reg_offset(edi, 0))
-    s.MOV_rm(edx, reg_offset(r12, 0))
-    s.MOV_rm(edx, reg_offset(r13, 0))
+    s.MOV_rm(edx, (edi, 0))
+    s.MOV_rm(edx, (r12, 0))
+    s.MOV_rm(edx, (r13, 0))
     assert s.getvalue() == '\x48\x8B\x17\x49\x8b\x14\x24\x49\x8b\x55\x00'
 
 def test_mov_rm_negative_64():
     s = CodeBuilder64()
-    s.MOV_rm(edx, reg_offset(edi, -1))
+    s.MOV_rm(edx, (edi, -1))
     assert s.getvalue() == '\x48\x8B\x57\xFF'
+
+def test_call_l_64():
+    # first check that it works there too
+    test_call_l(CodeBuilder64())
+    # then check the other case
+    s = CodeBuilder64()
+    target = 0x0123456789ABCDEF
+    s.CALL_l(target)     # becomes indirect, via RAX
+    assert s.getvalue() == '\x48\xB8' + struct.pack("<q", target) + '\xFF\xD0'

Modified: pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
==============================================================================
--- pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py	(original)
+++ pypy/branch/remove-ri386-multimethod-2/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py	Sun Dec 20 11:56:16 2009
@@ -43,30 +43,37 @@
     TESTDIR = 'rx86_32'
     X86_CodeBuilder = rx86.X86_32_CodeBuilder
     REGNAMES = ['%eax', '%ecx', '%edx', '%ebx', '%esp', '%ebp', '%esi', '%edi']
+    XMMREGNAMES = ['%%xmm%d' % i for i in range(16)]
     REGS = range(8)
     NONSPECREGS = [rx86.R.eax, rx86.R.ecx, rx86.R.edx, rx86.R.ebx,
                    rx86.R.esi, rx86.R.edi]
+    methname = '?'
 
     def reg_tests(self):
         return self.REGS
 
-    def stack_tests(self, count=COUNT1):
+    def stack_bp_tests(self, count=COUNT1):
         return ([0, 4, -4, 124, 128, -128, -132] +
                 [random.randrange(-0x20000000, 0x20000000) * 4
                  for i in range(count)])
 
+    def stack_sp_tests(self, count=COUNT1):
+        return ([0, 4, 124, 128] +
+                [random.randrange(0, 0x20000000) * 4
+                 for i in range(count)])
+
     def memory_tests(self):
-        return [rx86.reg_offset(reg, ofs)
+        return [(reg, ofs)
                     for reg in self.NONSPECREGS
-                    for ofs in self.stack_tests(5)
+                    for ofs in self.stack_bp_tests(5)
                 ]
 
     def array_tests(self):
-        return [rx86.reg_reg_scaleshift_offset(reg1, reg2, scaleshift, ofs)
+        return [(reg1, reg2, scaleshift, ofs)
                     for reg1 in self.NONSPECREGS
                     for reg2 in self.NONSPECREGS
                     for scaleshift in [0, 1, 2, 3]
-                    for ofs in self.stack_tests(1)
+                    for ofs in self.stack_bp_tests(1)
                 ]
 
     def imm8_tests(self):
@@ -81,41 +88,39 @@
              [random.randrange(128, 256) for i in range(COUNT1)])
         return self.imm8_tests() + v
 
+    def relative_tests(self):
+        py.test.skip("explicit test required for %r" % (self.methname,))
+
     def get_all_tests(self):
         return {
             'r': self.reg_tests,
-            's': self.stack_tests,
+            'b': self.stack_bp_tests,
+            's': self.stack_sp_tests,
             'm': self.memory_tests,
             'a': self.array_tests,
             'i': self.imm32_tests,
             'j': self.imm32_tests,
+            'l': self.relative_tests,
             }
 
     def assembler_operand_reg(self, regnum):
-        return self.REGNAMES[regnum]
+        if self.is_xmm_insn:
+            return self.XMMREGNAMES[regnum]
+        else:
+            return self.REGNAMES[regnum]
 
-    def assembler_operand_stack(self, position):
+    def assembler_operand_stack_bp(self, position):
         return '%d(%s)' % (position, self.REGNAMES[5])
 
-    def assembler_operand_memory(self, reg1_offset):
-        reg1 = intmask(reg1_offset >> 32)
-        offset = intmask(reg1_offset)
+    def assembler_operand_stack_sp(self, position):
+        return '%d(%s)' % (position, self.REGNAMES[4])
+
+    def assembler_operand_memory(self, (reg1, offset)):
         if not offset: offset = ''
         return '%s(%s)' % (offset, self.REGNAMES[reg1])
 
-    def assembler_operand_array(self, reg1_reg2_scaleshift_offset):
-        SIB = intmask(reg1_reg2_scaleshift_offset >> 32)
-        rex = SIB >> 8
-        SIB = SIB & 0xFF
-        offset = intmask(reg1_reg2_scaleshift_offset)
+    def assembler_operand_array(self, (reg1, reg2, scaleshift, offset)):
         if not offset: offset = ''
-        reg1 = SIB & 7
-        reg2 = (SIB >> 3) & 7
-        scaleshift = SIB >> 6
-        if rex & rx86.REX_B:
-            reg1 |= 8
-        if rex & rx86.REX_X:
-            reg2 |= 8
         return '%s(%s,%s,%d)' % (offset, self.REGNAMES[reg1],
                                  self.REGNAMES[reg2], 1<<scaleshift)
 
@@ -128,7 +133,8 @@
     def get_all_assembler_operands(self):
         return {
             'r': self.assembler_operand_reg,
-            's': self.assembler_operand_stack,
+            'b': self.assembler_operand_stack_bp,
+            's': self.assembler_operand_stack_sp,
             'm': self.assembler_operand_memory,
             'a': self.assembler_operand_array,
             'i': self.assembler_operand_imm,
@@ -150,7 +156,7 @@
     ##        for m, extra in args:
     ##            if m in (i386.MODRM, i386.MODRM8) or all:
     ##                suffix = suffixes[sizes[m]] + suffix
-            if argmodes:
+            if argmodes and not self.is_xmm_insn:
                 suffix = suffixes[self.WORD]
 
             following = ""
@@ -230,6 +236,9 @@
         else:
             instrname, argmodes = methname, ''
         print "Testing %s with argmodes=%r" % (instrname, argmodes)
+        self.methname = methname
+        self.is_xmm_insn = getattr(getattr(rx86.AbstractX86CodeBuilder,
+                                           methname), 'is_xmm_insn', False)
         ilist = self.make_all_tests(methname, argmodes)
         oplist, as_code = self.run_test(methname, instrname, argmodes, ilist)
         cc = self.get_code_checker_class()(as_code)



More information about the Pypy-commit mailing list