[pypy-svn] r76931 - in pypy/trunk/pypy: config jit/backend/llsupport jit/backend/llsupport/test jit/backend/x86 jit/backend/x86/test module/pypyjit/test rpython/memory/gctransform translator/c/gcc translator/c/gcc/test translator/c/gcc/test/elf64 translator/platform

arigo at codespeak.net arigo at codespeak.net
Wed Sep 8 10:06:18 CEST 2010


Author: arigo
Date: Wed Sep  8 10:06:15 2010
New Revision: 76931

Added:
   pypy/trunk/pypy/translator/c/gcc/test/elf64/
   pypy/trunk/pypy/translator/c/gcc/test/elf64/track_32bit_reg_zeroextend.s
   pypy/trunk/pypy/translator/c/gcc/test/elf64/track_basic_argument_registers.s
   pypy/trunk/pypy/translator/c/gcc/test/elf64/track_jumptable.c   (contents, props changed)
   pypy/trunk/pypy/translator/c/gcc/test/elf64/track_jumptable.s
   pypy/trunk/pypy/translator/c/gcc/test/elf64/track_negative_rsp_offset.s
   pypy/trunk/pypy/translator/c/gcc/test/elf64/track_varargs_function.s
Modified:
   pypy/trunk/pypy/config/translationoption.py
   pypy/trunk/pypy/jit/backend/llsupport/gc.py
   pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py
   pypy/trunk/pypy/jit/backend/x86/assembler.py
   pypy/trunk/pypy/jit/backend/x86/regalloc.py
   pypy/trunk/pypy/jit/backend/x86/regloc.py
   pypy/trunk/pypy/jit/backend/x86/rx86.py
   pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py
   pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py
   pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py
   pypy/trunk/pypy/module/pypyjit/test/test_pypy_c.py
   pypy/trunk/pypy/rpython/memory/gctransform/asmgcroot.py
   pypy/trunk/pypy/translator/c/gcc/instruction.py
   pypy/trunk/pypy/translator/c/gcc/test/conftest.py
   pypy/trunk/pypy/translator/c/gcc/test/test_trackgcroot.py
   pypy/trunk/pypy/translator/c/gcc/trackgcroot.py
   pypy/trunk/pypy/translator/platform/linux.py
   pypy/trunk/pypy/translator/platform/posix.py
Log:
Merge branch/asmgcc-64.  I *think* that after a couple of minor changes
it does not break asmgcc on 32 bits, and it seems to work on 64 bits too.



Modified: pypy/trunk/pypy/config/translationoption.py
==============================================================================
--- pypy/trunk/pypy/config/translationoption.py	(original)
+++ pypy/trunk/pypy/config/translationoption.py	Wed Sep  8 10:06:15 2010
@@ -343,11 +343,7 @@
     }
 
 def final_check_config(config):
-    # For now, 64-bit JIT requires boehm.  You have to say it explicitly
-    # with --gc=boehm, so that you don't get boehm by mistake.
-    if IS_64_BITS:
-        if config.translation.jit and config.translation.gc != 'boehm':
-            raise ConfigError("for now, 64-bit JIT requires --gc=boehm")
+    pass
 
 def set_opt_level(config, level):
     """Apply optimization suggestions on the 'config'.

Modified: pypy/trunk/pypy/jit/backend/llsupport/gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llsupport/gc.py	(original)
+++ pypy/trunk/pypy/jit/backend/llsupport/gc.py	Wed Sep  8 10:06:15 2010
@@ -251,13 +251,25 @@
         if oldgcmap:
             lltype.free(oldgcmap, flavor='raw')
 
-    def get_basic_shape(self):
-        return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   4(%ebp)
-                chr(self.LOC_EBP_MINUS | 4),    # saved %ebx:  at  -4(%ebp)
-                chr(self.LOC_EBP_MINUS | 8),    # saved %esi:  at  -8(%ebp)
-                chr(self.LOC_EBP_MINUS | 12),   # saved %edi:  at -12(%ebp)
-                chr(self.LOC_EBP_PLUS  | 0),    # saved %ebp:  at    (%ebp)
-                chr(0)]
+    def get_basic_shape(self, is_64_bit=False):
+        # XXX: Should this code even really know about stack frame layout of
+        # the JIT?
+        if is_64_bit:
+            return [chr(self.LOC_EBP_PLUS  | 8),
+                    chr(self.LOC_EBP_MINUS | 8),
+                    chr(self.LOC_EBP_MINUS | 16),
+                    chr(self.LOC_EBP_MINUS | 24),
+                    chr(self.LOC_EBP_MINUS | 32),
+                    chr(self.LOC_EBP_MINUS | 40),
+                    chr(self.LOC_EBP_PLUS  | 0),
+                    chr(0)]
+        else:
+            return [chr(self.LOC_EBP_PLUS  | 4),    # return addr: at   4(%ebp)
+                    chr(self.LOC_EBP_MINUS | 4),    # saved %ebx:  at  -4(%ebp)
+                    chr(self.LOC_EBP_MINUS | 8),    # saved %esi:  at  -8(%ebp)
+                    chr(self.LOC_EBP_MINUS | 12),   # saved %edi:  at -12(%ebp)
+                    chr(self.LOC_EBP_PLUS  | 0),    # saved %ebp:  at    (%ebp)
+                    chr(0)]
 
     def _encode_num(self, shape, number):
         assert number >= 0
@@ -276,17 +288,9 @@
             num = self.LOC_EBP_MINUS | (-offset)
         self._encode_num(shape, num)
 
-    def add_ebx(self, shape):
-        shape.append(chr(self.LOC_REG | 4))
-
-    def add_esi(self, shape):
-        shape.append(chr(self.LOC_REG | 8))
-
-    def add_edi(self, shape):
-        shape.append(chr(self.LOC_REG | 12))
-
-    def add_ebp(self, shape):
-        shape.append(chr(self.LOC_REG | 16))
+    def add_callee_save_reg(self, shape, reg_index):
+        assert reg_index > 0
+        shape.append(chr(self.LOC_REG | (reg_index << 2)))
 
     def compress_callshape(self, shape):
         # Similar to compress_callshape() in trackgcroot.py.

Modified: pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py	(original)
+++ pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py	Wed Sep  8 10:06:15 2010
@@ -73,16 +73,16 @@
     gcrootmap.add_ebp_offset(shape, num1)
     gcrootmap.add_ebp_offset(shape, num2)
     assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a])
-    gcrootmap.add_ebx(shape)
+    gcrootmap.add_callee_save_reg(shape, 1)
     assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a,
                               4])
-    gcrootmap.add_esi(shape)
+    gcrootmap.add_callee_save_reg(shape, 2)
     assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a,
                               4, 8])
-    gcrootmap.add_edi(shape)
+    gcrootmap.add_callee_save_reg(shape, 3)
     assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a,
                               4, 8, 12])
-    gcrootmap.add_ebp(shape)
+    gcrootmap.add_callee_save_reg(shape, 4)
     assert shape == map(chr, [6, 7, 11, 15, 2, 0, num1a, num2b, num2a,
                               4, 8, 12, 16])
     #

Modified: pypy/trunk/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/assembler.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/assembler.py	Wed Sep  8 10:06:15 2010
@@ -273,7 +273,8 @@
         if IS_X86_32:
             self.mc.MOV_sr(WORD, edx.value)        # save it as the new argument
         elif IS_X86_64:
-            # FIXME: We can't just clobber rdi like this, can we?
+            # rdi can be clobbered: its content was forced to the stack
+            # by _fastpath_malloc(), like all other save_around_call_regs.
             self.mc.MOV_rr(edi.value, edx.value)
 
         addr = self.cpu.gc_ll_descr.get_malloc_fixedsize_slowpath_addr()
@@ -1256,8 +1257,12 @@
             sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
             type_info_group = llop.gc_get_type_info_group(llmemory.Address)
             type_info_group = rffi.cast(lltype.Signed, type_info_group)
-            expected_typeid = (classptr - sizeof_ti - type_info_group) >> 2
-            self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
+            expected_typeid = classptr - sizeof_ti - type_info_group
+            if IS_X86_32:
+                expected_typeid >>= 2
+                self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
+            elif IS_X86_64:
+                self.mc.CMP32_mi((locs[0].value, 0), expected_typeid)
 
     def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
         self.mc.ensure_bytes_available(256)

Modified: pypy/trunk/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/regalloc.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/regalloc.py	Wed Sep  8 10:06:15 2010
@@ -26,6 +26,12 @@
     no_lower_byte_regs = [esi, edi]
     save_around_call_regs = [eax, edx, ecx]
 
+    REGLOC_TO_GCROOTMAP_REG_INDEX = {
+        ebx: 1,
+        esi: 2,
+        edi: 3,
+    }
+
     def call_result_location(self, v):
         return eax
 
@@ -47,6 +53,13 @@
     no_lower_byte_regs = []
     save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
 
+    REGLOC_TO_GCROOTMAP_REG_INDEX = {
+        ebx: 1,
+        r12: 2,
+        r13: 3,
+        r14: 4,
+        r15: 5,
+    }
 
 class FloatConstants(object):
     BASE_CONSTANT_SIZE = 1000
@@ -694,23 +707,18 @@
     def _fastpath_malloc(self, op, descr):
         assert isinstance(descr, BaseSizeDescr)
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        tmp0 = TempBox()
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-        self.rm.force_allocate_reg(tmp0, selected_reg=edx)
-        # XXX about the next 10 lines: why not just say
-        #      force_allocate_reg(tmp1, selected_reg=ecx)?????
-        for v, reg in self.rm.reg_bindings.items():
-            if reg is ecx:
-                to_sync = v
-                break
-        else:
-            to_sync = None
-        if to_sync is not None:
-            self.rm._sync_var(to_sync)
-            del self.rm.reg_bindings[to_sync]
-            self.rm.free_regs.append(ecx)
-        # we need to do it here, so edx is not in reg_bindings
-        self.rm.possibly_free_var(tmp0)
+        # We need to force-allocate each of save_around_call_regs now.
+        # The alternative would be to save and restore them around the
+        # actual call to malloc(), in the rare case where we need to do
+        # it; however, mark_gc_roots() would need to be adapted to know
+        # where the variables end up being saved.  Messy.
+        for reg in self.rm.save_around_call_regs:
+            if reg is not eax:
+                tmp_box = TempBox()
+                self.rm.force_allocate_reg(tmp_box, selected_reg=reg)
+                self.rm.possibly_free_var(tmp_box)
+
         self.assembler.malloc_cond_fixedsize(
             gc_ll_descr.get_nursery_free_addr(),
             gc_ll_descr.get_nursery_top_addr(),
@@ -962,7 +970,7 @@
         pass
 
     def get_mark_gc_roots(self, gcrootmap):
-        shape = gcrootmap.get_basic_shape()
+        shape = gcrootmap.get_basic_shape(IS_X86_64)
         for v, val in self.fm.frame_bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
@@ -971,15 +979,8 @@
             if reg is eax:
                 continue      # ok to ignore this one
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
-                if reg is ebx:
-                    gcrootmap.add_ebx(shape)
-                elif reg is esi:
-                    gcrootmap.add_esi(shape)
-                elif reg is edi:
-                    gcrootmap.add_edi(shape)
-                else:
-                    print "[get_mark_gc_roots] bogus register", reg
-                    assert False
+                assert reg in self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX
+                gcrootmap.add_callee_save_reg(shape, self.rm.REGLOC_TO_GCROOTMAP_REG_INDEX[reg])
         return gcrootmap.compress_callshape(shape)
 
     def consider_force_token(self, op):

Modified: pypy/trunk/pypy/jit/backend/x86/regloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/regloc.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/regloc.py	Wed Sep  8 10:06:15 2010
@@ -1,7 +1,7 @@
 from pypy.jit.metainterp.history import AbstractValue, ConstInt
 from pypy.jit.backend.x86 import rx86
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.jit.backend.x86.arch import WORD
+from pypy.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rarithmetic import intmask

Modified: pypy/trunk/pypy/jit/backend/x86/rx86.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/rx86.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/rx86.py	Wed Sep  8 10:06:15 2010
@@ -462,6 +462,8 @@
     CMP_ji = select_8_or_32_bit_immed(CMP_ji8, CMP_ji32)
     CMP_rj = insn(rex_w, '\x3B', register(1, 8), '\x05', immediate(2))
 
+    CMP32_mi = insn(rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
+
     AND8_rr = insn(rex_w, '\x20', byte_register(1), byte_register(2,8), '\xC0')
 
     OR8_rr = insn(rex_w, '\x08', byte_register(1), byte_register(2,8), '\xC0')

Modified: pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_gc_integration.py	Wed Sep  8 10:06:15 2010
@@ -26,16 +26,13 @@
 CPU = getcpuclass()
 
 class MockGcRootMap(object):
-    def get_basic_shape(self):
+    def get_basic_shape(self, is_64_bit):
         return ['shape']
     def add_ebp_offset(self, shape, offset):
         shape.append(offset)
-    def add_ebx(self, shape):
-        shape.append('ebx')
-    def add_esi(self, shape):
-        shape.append('esi')
-    def add_edi(self, shape):
-        shape.append('edi')
+    def add_callee_save_reg(self, shape, reg_index):
+        index_to_name = { 1: 'ebx', 2: 'esi', 3: 'edi' }
+        shape.append(index_to_name[reg_index])
     def compress_callshape(self, shape):
         assert shape[0] == 'shape'
         return ['compressed'] + shape[1:]

Modified: pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_zrpy_gc.py	Wed Sep  8 10:06:15 2010
@@ -128,10 +128,6 @@
 
 class TestCompileHybrid(object):
     def setup_class(cls):
-        if IS_X86_64:
-            # No hybrid GC on 64-bit for the time being
-            py.test.skip()
-
         funcs = []
         name_to_func = {}
         for fullname in dir(cls):

Modified: pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py	Wed Sep  8 10:06:15 2010
@@ -125,10 +125,6 @@
         return t
 
     def test_external_exception_handling_translates(self):
-        # FIXME
-        if IS_X86_64:
-            import py.test; py.test.skip()
-
         jitdriver = JitDriver(greens = [], reds = ['n', 'total'])
 
         class ImDone(Exception):

Modified: pypy/trunk/pypy/module/pypyjit/test/test_pypy_c.py
==============================================================================
--- pypy/trunk/pypy/module/pypyjit/test/test_pypy_c.py	(original)
+++ pypy/trunk/pypy/module/pypyjit/test/test_pypy_c.py	Wed Sep  8 10:06:15 2010
@@ -189,7 +189,7 @@
                 return r
         ''', 28,
                    ([5], 120),
-                    ([20], 2432902008176640000L))
+                    ([25], 15511210043330985984000000L))
 
     def test_factorialrec(self):
         self.run_source('''
@@ -200,7 +200,7 @@
                     return 1
         ''', 0,
                    ([5], 120),
-                    ([20], 2432902008176640000L))
+                    ([25], 15511210043330985984000000L))
 
     def test_richards(self):
         self.run_source('''

Modified: pypy/trunk/pypy/rpython/memory/gctransform/asmgcroot.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gctransform/asmgcroot.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gctransform/asmgcroot.py	Wed Sep  8 10:06:15 2010
@@ -18,6 +18,7 @@
 #  The .s file produced by GCC is then parsed by trackgcroot.py.
 #
 
+IS_64_BITS = sys.maxint > 2147483647
 
 class AsmGcRootFrameworkGCTransformer(FrameworkGCTransformer):
     _asmgcc_save_restore_arguments = None
@@ -326,7 +327,7 @@
             ll_assert(reg < CALLEE_SAVED_REGS, "bad register location")
             return callee.regs_stored_at[reg]
         elif kind == LOC_ESP_PLUS:    # in the caller stack frame at N(%esp)
-            esp_in_caller = callee.frame_address + 4
+            esp_in_caller = callee.frame_address + sizeofaddr
             return esp_in_caller + offset
         elif kind == LOC_EBP_PLUS:    # in the caller stack frame at N(%ebp)
             ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0]
@@ -415,11 +416,12 @@
     key1 = addr1.address[0]
     key2 = addr2.address[0]
     if key1 < key2:
-        return -1
+        result = -1
     elif key1 == key2:
-        return 0
+        result = 0
     else:
-        return 1
+        result = 1
+    return rffi.cast(rffi.INT, result)
 
 # ____________________________________________________________
 
@@ -464,9 +466,15 @@
 #   - frame address (actually the addr of the retaddr of the current function;
 #                    that's the last word of the frame in memory)
 #
-CALLEE_SAVED_REGS = 4       # there are 4 callee-saved registers
-INDEX_OF_EBP      = 3
-FRAME_PTR         = CALLEE_SAVED_REGS    # the frame is at index 4 in the array
+
+if IS_64_BITS:
+    CALLEE_SAVED_REGS = 6
+    INDEX_OF_EBP      = 5
+    FRAME_PTR         = CALLEE_SAVED_REGS
+else:
+    CALLEE_SAVED_REGS = 4       # there are 4 callee-saved registers
+    INDEX_OF_EBP      = 3
+    FRAME_PTR         = CALLEE_SAVED_REGS    # the frame is at index 4 in the array
 
 ASM_CALLBACK_PTR = lltype.Ptr(lltype.FuncType([], lltype.Void))
 

Modified: pypy/trunk/pypy/translator/c/gcc/instruction.py
==============================================================================
--- pypy/trunk/pypy/translator/c/gcc/instruction.py	(original)
+++ pypy/trunk/pypy/translator/c/gcc/instruction.py	Wed Sep  8 10:06:15 2010
@@ -5,6 +5,14 @@
 LOC_MASK      = 0x03
 LOC_NOWHERE   = LOC_REG | 0
 
+# x86-32 registers sometimes used to pass arguments when gcc optimizes
+# a function's calling convention
+ARGUMENT_REGISTERS_32 = ('%eax', '%edx', '%ecx')
+
+# x86-64 registers used to pass arguments
+ARGUMENT_REGISTERS_64 = ('%rdi', '%rsi', '%rdx', '%rcx', '%r8', '%r9')
+
+
 def frameloc_esp(offset):
     assert offset >= 0
     assert offset % 4 == 0
@@ -19,7 +27,8 @@
 
 
 class SomeNewValue(object):
-    pass
+    def __repr__(self):
+        return 'somenewvalue'
 somenewvalue = SomeNewValue()
 
 class LocalVar(object):
@@ -42,7 +51,7 @@
         else:
             return 1
 
-    def getlocation(self, framesize, uses_frame_pointer):
+    def getlocation(self, framesize, uses_frame_pointer, wordsize):
         if (self.hint == 'esp' or not uses_frame_pointer
             or self.ofs_from_frame_end % 2 != 0):
             # try to use esp-relative addressing
@@ -52,7 +61,7 @@
             # we can get an odd value if the framesize is marked as bogus
             # by visit_andl()
         assert uses_frame_pointer
-        ofs_from_ebp = self.ofs_from_frame_end + 4
+        ofs_from_ebp = self.ofs_from_frame_end + wordsize
         return frameloc_ebp(ofs_from_ebp)
 
 
@@ -81,22 +90,28 @@
         self.previous_insns = []   # all insns that jump (or fallthrough) here
 
 class InsnFunctionStart(Insn):
+    _args_ = ['arguments']
     framesize = 0
     previous_insns = ()
-    def __init__(self, registers):
+    def __init__(self, registers, wordsize):
         self.arguments = {}
         for reg in registers:
             self.arguments[reg] = somenewvalue
+        self.wordsize = wordsize
 
     def source_of(self, localvar, tag):
         if localvar not in self.arguments:
-            if localvar in ('%eax', '%edx', '%ecx'):
+            if self.wordsize == 4 and localvar in ARGUMENT_REGISTERS_32:
                 # xxx this might show a bug in trackgcroot.py failing to
                 # figure out which instruction stored a value in these
                 # registers.  However, this case also occurs when the
                 # the function's calling convention was optimized by gcc:
                 # the 3 registers above are then used to pass arguments
                 pass
+            elif self.wordsize == 8 and localvar in ARGUMENT_REGISTERS_64:
+                # this is normal: these registers are always used to
+                # pass arguments
+                pass
             else:
                 assert (isinstance(localvar, LocalVar) and
                         localvar.ofs_from_frame_end > 0), (
@@ -218,15 +233,16 @@
         return {self.loc: None}
 
 class InsnPrologue(Insn):
+    def __init__(self, wordsize):
+        self.wordsize = wordsize
     def __setattr__(self, attr, value):
         if attr == 'framesize':
-            assert value == 4, ("unrecognized function prologue - "
-                                "only supports push %ebp; movl %esp, %ebp")
+            assert value == self.wordsize, (
+                "unrecognized function prologue - "
+                "only supports push %ebp; movl %esp, %ebp")
         Insn.__setattr__(self, attr, value)
 
 class InsnEpilogue(Insn):
     def __init__(self, framesize=None):
         if framesize is not None:
             self.framesize = framesize
-
-

Modified: pypy/trunk/pypy/translator/c/gcc/test/conftest.py
==============================================================================
--- pypy/trunk/pypy/translator/c/gcc/test/conftest.py	(original)
+++ pypy/trunk/pypy/translator/c/gcc/test/conftest.py	Wed Sep  8 10:06:15 2010
@@ -1,8 +1,6 @@
 import py
 from pypy.jit.backend import detect_cpu
-
 cpu = detect_cpu.autodetect()
 def pytest_runtest_setup(item):
-    if cpu != 'x86':
+    if cpu not in ('x86', 'x86_64'):
         py.test.skip("x86 directory skipped: cpu is %r" % (cpu,))
-    

Added: pypy/trunk/pypy/translator/c/gcc/test/elf64/track_32bit_reg_zeroextend.s
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/translator/c/gcc/test/elf64/track_32bit_reg_zeroextend.s	Wed Sep  8 10:06:15 2010
@@ -0,0 +1,15 @@
+	.type	foobar, @function
+foobar:
+	pushq %rbp
+	movq %rsp, %rbp
+	call some_function
+	;; expected {8(%rbp) | %rbx, %r12, %r13, %r14, %r15, (%rbp) | }
+	movl $const1, %edx
+	movl $const2, %r10d
+	xorl %r10d, %r11d
+	/* GCROOT %rdx */
+	/* GCROOT %r10 */
+	/* GCROOT %r11 */
+	leave
+	ret
+	.size	foobar, .-foobar

Added: pypy/trunk/pypy/translator/c/gcc/test/elf64/track_basic_argument_registers.s
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/translator/c/gcc/test/elf64/track_basic_argument_registers.s	Wed Sep  8 10:06:15 2010
@@ -0,0 +1,31 @@
+	.type	foobar, @function
+foobar:
+.LFB0:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	movq	%rsp, %rbp
+	.cfi_offset 6, -16
+	.cfi_def_cfa_register 6
+	subq	$48, %rsp
+	movq	%rdi, -8(%rbp)
+	movq	%rsi, -16(%rbp)
+	movq	%rdx, -24(%rbp)
+	movq	%rcx, -32(%rbp)
+	movq	%r8, -40(%rbp)
+	movq	%r9, -48(%rbp)
+	movl	$0, %eax
+	call	some_function
+	;; expected {8(%rbp) | %rbx, %r12, %r13, %r14, %r15, (%rbp) | -8(%rbp), -16(%rbp), -24(%rbp), -32(%rbp), -40(%rbp), -48(%rbp)}
+	/* GCROOT -8(%rbp) */
+	/* GCROOT -16(%rbp) */
+	/* GCROOT -24(%rbp) */
+	/* GCROOT -32(%rbp) */
+	/* GCROOT -40(%rbp) */
+	/* GCROOT -48(%rbp) */
+	movq	-24(%rbp), %rax
+	leave
+	ret
+	.cfi_endproc
+.LFE0:
+	.size	foobar, .-foobar

Added: pypy/trunk/pypy/translator/c/gcc/test/elf64/track_jumptable.c
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/translator/c/gcc/test/elf64/track_jumptable.c	Wed Sep  8 10:06:15 2010
@@ -0,0 +1,18 @@
+#include <stdio.h>
+
+int foobar(int n) {
+	switch(n) {
+		case 0:
+			return 1;
+		case 1:
+			return 12;
+		case 2:
+			return 123;
+		case 3:
+			return 1234;
+		case 4:
+			return 12345;
+		default:
+			return 42;
+	}
+}

Added: pypy/trunk/pypy/translator/c/gcc/test/elf64/track_jumptable.s
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/translator/c/gcc/test/elf64/track_jumptable.s	Wed Sep  8 10:06:15 2010
@@ -0,0 +1,48 @@
+	.type	foobar, @function
+foobar:
+.LFB0:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	movq	%rsp, %rbp
+	.cfi_offset 6, -16
+	.cfi_def_cfa_register 6
+	movl	%edi, -4(%rbp)
+	cmpl	$4, -4(%rbp)
+	ja	.L2
+	mov	-4(%rbp), %eax
+	movq	.L8(,%rax,8), %rax
+	jmp	*%rax
+	.section	.rodata
+	.align 8
+	.align 4
+.L8:
+	.quad	.L3
+	.quad	.L4
+	.quad	.L5
+	.quad	.L6
+	.quad	.L7
+	.text
+.L3:
+	movl	$1, %eax
+	jmp	.L9
+.L4:
+	movl	$12, %eax
+	jmp	.L9
+.L5:
+	movl	$123, %eax
+	jmp	.L9
+.L6:
+	movl	$1234, %eax
+	jmp	.L9
+.L7:
+	movl	$12345, %eax
+	jmp	.L9
+.L2:
+	movl	$42, %eax
+.L9:
+	leave
+	ret
+	.cfi_endproc
+.LFE0:
+	.size	foobar, .-foobar

Added: pypy/trunk/pypy/translator/c/gcc/test/elf64/track_negative_rsp_offset.s
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/translator/c/gcc/test/elf64/track_negative_rsp_offset.s	Wed Sep  8 10:06:15 2010
@@ -0,0 +1,17 @@
+	.type some_function, @function
+some_function:
+	;; Test using a negative offset from %rsp (gcc sometimes does this)
+	movq %rbx, -8(%rsp)
+	subq $8, %rsp
+
+	movq %rdi, %rbx
+
+	call some_other_function
+	;; expected {8(%rsp) | (%rsp), %r12, %r13, %r14, %r15, %rbp | %rbx}
+	/* GCROOT %rbx */
+
+	movq %rbx, %rax
+	;; Same as where %rbx was saved above
+	movq (%rsp), %rbx
+	ret
+	.size some_function, .-some_function

Added: pypy/trunk/pypy/translator/c/gcc/test/elf64/track_varargs_function.s
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/translator/c/gcc/test/elf64/track_varargs_function.s	Wed Sep  8 10:06:15 2010
@@ -0,0 +1,58 @@
+	.type	PyErr_Format, @function
+PyErr_Format:
+.LFB67:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	movzbl	%al, %eax
+	pushq	%rbx
+	.cfi_def_cfa_offset 24
+	movq	%rdi, %rbx
+	.cfi_offset 3, -24
+	.cfi_offset 6, -16
+	movq	%rsi, %rdi
+	subq	$216, %rsp
+	.cfi_def_cfa_offset 240
+	movq	%rdx, 48(%rsp)
+	leaq	0(,%rax,4), %rdx
+	movl	$.L21, %eax
+	movq	%rcx, 56(%rsp)
+	movq	%r8, 64(%rsp)
+	movq	%rsp, %rsi
+	subq	%rdx, %rax
+	leaq	207(%rsp), %rdx
+	movq	%r9, 72(%rsp)
+	jmp	*%rax
+	movaps	%xmm7, -15(%rdx)
+	movaps	%xmm6, -31(%rdx)
+	movaps	%xmm5, -47(%rdx)
+	movaps	%xmm4, -63(%rdx)
+	movaps	%xmm3, -79(%rdx)
+	movaps	%xmm2, -95(%rdx)
+	movaps	%xmm1, -111(%rdx)
+	movaps	%xmm0, -127(%rdx)
+.L21:
+	leaq	240(%rsp), %rax
+	movl	$16, (%rsp)
+	movl	$48, 4(%rsp)
+	movq	%rax, 8(%rsp)
+	leaq	32(%rsp), %rax
+	movq	%rax, 16(%rsp)
+	call	PyString_FromFormatV
+	;; expected {232(%rsp) | 216(%rsp), %r12, %r13, %r14, %r15, 224(%rsp) | }
+	movq	%rbx, %rdi
+	movq	%rax, %rbp
+	movq	%rax, %rsi
+	call	PyErr_SetObject
+	;; expected {232(%rsp) | 216(%rsp), %r12, %r13, %r14, %r15, 224(%rsp) | }
+	movq	%rbp, %rdi
+	call	Py_DecRef
+	;; expected {232(%rsp) | 216(%rsp), %r12, %r13, %r14, %r15, 224(%rsp) | }
+	addq	$216, %rsp
+	xorl	%eax, %eax
+	popq	%rbx
+	popq	%rbp
+	ret
+	.cfi_endproc
+.LFE67:
+	.size	PyErr_Format, .-PyErr_Format

Modified: pypy/trunk/pypy/translator/c/gcc/test/test_trackgcroot.py
==============================================================================
--- pypy/trunk/pypy/translator/c/gcc/test/test_trackgcroot.py	(original)
+++ pypy/trunk/pypy/translator/c/gcc/test/test_trackgcroot.py	Wed Sep  8 10:06:15 2010
@@ -1,51 +1,52 @@
 import py
 import sys, re
-from pypy.translator.c.gcc.trackgcroot import format_location
-from pypy.translator.c.gcc.trackgcroot import format_callshape
 from pypy.translator.c.gcc.trackgcroot import LOC_NOWHERE, LOC_REG
 from pypy.translator.c.gcc.trackgcroot import LOC_EBP_PLUS, LOC_EBP_MINUS
 from pypy.translator.c.gcc.trackgcroot import LOC_ESP_PLUS
 from pypy.translator.c.gcc.trackgcroot import ElfAssemblerParser
 from pypy.translator.c.gcc.trackgcroot import DarwinAssemblerParser
-from pypy.translator.c.gcc.trackgcroot import compress_callshape
-from pypy.translator.c.gcc.trackgcroot import decompress_callshape
 from pypy.translator.c.gcc.trackgcroot import PARSERS
+from pypy.translator.c.gcc.trackgcroot import ElfFunctionGcRootTracker32
 from StringIO import StringIO
+import py.test
 
 this_dir = py.path.local(__file__).dirpath()
 
 
 def test_format_location():
-    assert format_location(LOC_NOWHERE) == '?'
-    assert format_location(LOC_REG | (1<<2)) == '%ebx'
-    assert format_location(LOC_REG | (2<<2)) == '%esi'
-    assert format_location(LOC_REG | (3<<2)) == '%edi'
-    assert format_location(LOC_REG | (4<<2)) == '%ebp'
-    assert format_location(LOC_EBP_PLUS + 0) == '(%ebp)'
-    assert format_location(LOC_EBP_PLUS + 4) == '4(%ebp)'
-    assert format_location(LOC_EBP_MINUS + 4) == '-4(%ebp)'
-    assert format_location(LOC_ESP_PLUS + 0) == '(%esp)'
-    assert format_location(LOC_ESP_PLUS + 4) == '4(%esp)'
+    cls = ElfFunctionGcRootTracker32
+    assert cls.format_location(LOC_NOWHERE) == '?'
+    assert cls.format_location(LOC_REG | (1<<2)) == '%ebx'
+    assert cls.format_location(LOC_REG | (2<<2)) == '%esi'
+    assert cls.format_location(LOC_REG | (3<<2)) == '%edi'
+    assert cls.format_location(LOC_REG | (4<<2)) == '%ebp'
+    assert cls.format_location(LOC_EBP_PLUS + 0) == '(%ebp)'
+    assert cls.format_location(LOC_EBP_PLUS + 4) == '4(%ebp)'
+    assert cls.format_location(LOC_EBP_MINUS + 4) == '-4(%ebp)'
+    assert cls.format_location(LOC_ESP_PLUS + 0) == '(%esp)'
+    assert cls.format_location(LOC_ESP_PLUS + 4) == '4(%esp)'
 
 def test_format_callshape():
+    cls = ElfFunctionGcRootTracker32
     expected = ('{4(%ebp) '               # position of the return address
                 '| 8(%ebp), 12(%ebp), 16(%ebp), 20(%ebp) '  # 4 saved regs
                 '| 24(%ebp), 28(%ebp)}')                    # GC roots
-    assert format_callshape((LOC_EBP_PLUS+4,
-                             LOC_EBP_PLUS+8,
-                             LOC_EBP_PLUS+12,
-                             LOC_EBP_PLUS+16,
-                             LOC_EBP_PLUS+20,
-                             LOC_EBP_PLUS+24,
-                             LOC_EBP_PLUS+28)) == expected
+    assert cls.format_callshape((LOC_EBP_PLUS+4,
+                                 LOC_EBP_PLUS+8,
+                                 LOC_EBP_PLUS+12,
+                                 LOC_EBP_PLUS+16,
+                                 LOC_EBP_PLUS+20,
+                                 LOC_EBP_PLUS+24,
+                                 LOC_EBP_PLUS+28)) == expected
 
 def test_compress_callshape():
+    cls = ElfFunctionGcRootTracker32
     shape = (1, 127, 0x1234, 0x5678, 0x234567,
              0x765432, 0x61626364, 0x41424344)
-    bytes = list(compress_callshape(shape))
+    bytes = list(cls.compress_callshape(shape))
     print bytes
     assert len(bytes) == 1+1+2+3+4+4+5+5+1
-    assert decompress_callshape(bytes) == list(shape)
+    assert cls.decompress_callshape(bytes) == list(shape)
 
 def test_find_functions_elf():
     source = """\
@@ -108,7 +109,7 @@
  
 def test_computegcmaptable():
     tests = []
-    for format in ('elf', 'darwin', 'msvc'):
+    for format in ('elf', 'darwin', 'msvc', 'elf64'):
         for path in this_dir.join(format).listdir("track*.s"):
             n = path.purebasename[5:]
             try:
@@ -138,7 +139,7 @@
     tabledict = {}
     seen = {}
     for entry in table:
-        print '%s: %s' % (entry[0], format_callshape(entry[1]))
+        print '%s: %s' % (entry[0], tracker.format_callshape(entry[1]))
         tabledict[entry[0]] = entry[1]
     # find the ";; expected" lines
     prevline = ""
@@ -151,7 +152,7 @@
             label = prevmatch.group(1)
             assert label in tabledict
             got = tabledict[label]
-            assert format_callshape(got) == expected
+            assert tracker.format_callshape(got) == expected
             seen[label] = True
             if format == 'msvc':
                 expectedlines.insert(i-2, 'PUBLIC\t%s\n' % (label,))

Modified: pypy/trunk/pypy/translator/c/gcc/trackgcroot.py
==============================================================================
--- pypy/trunk/pypy/translator/c/gcc/trackgcroot.py	(original)
+++ pypy/trunk/pypy/translator/c/gcc/trackgcroot.py	Wed Sep  8 10:06:15 2010
@@ -72,7 +72,7 @@
             if self.is_stack_bottom:
                 retaddr = LOC_NOWHERE     # end marker for asmgcroot.py
             elif self.uses_frame_pointer:
-                retaddr = frameloc_ebp(4)
+                retaddr = frameloc_ebp(self.WORD)
             else:
                 retaddr = frameloc_esp(insn.framesize)
             shape = [retaddr]
@@ -84,7 +84,8 @@
             for localvar, tag in insn.gcroots.items():
                 if isinstance(localvar, LocalVar):
                     loc = localvar.getlocation(insn.framesize,
-                                               self.uses_frame_pointer)
+                                               self.uses_frame_pointer,
+                                               self.WORD)
                 elif localvar in self.REG2LOC:
                     loc = self.REG2LOC[localvar]
                 else:
@@ -148,7 +149,7 @@
             lst.append(previnsn)
 
     def parse_instructions(self):
-        self.insns = [InsnFunctionStart(self.CALLEE_SAVE_REGISTERS)]
+        self.insns = [InsnFunctionStart(self.CALLEE_SAVE_REGISTERS, self.WORD)]
         ignore_insns = False
         for lineno, line in enumerate(self.lines):
             if lineno < self.skip:
@@ -263,7 +264,7 @@
                     ofs_from_ebp = int(match.group(1) or '0')
                     if self.format == 'msvc':
                         ofs_from_ebp += int(match.group(2) or '0')
-                    localvar = ofs_from_ebp - 4
+                    localvar = ofs_from_ebp - self.WORD
                     assert localvar != 0    # that's the return address
                     return LocalVar(localvar, hint='ebp')
             return localvar
@@ -357,6 +358,56 @@
                 self.lines.insert(call.lineno+1, '\t.globl\t%s\n' % (label,))
         call.global_label = label
 
+    @classmethod
+    def compress_callshape(cls, shape):
+        # For a single shape, this turns the list of integers into a list of
+        # bytes and reverses the order of the entries.  The length is
+        # encoded by inserting a 0 marker after the gc roots coming from
+        # shape[N:] and before the N values coming from shape[N-1] to
+        # shape[0] (for N == 5 on 32-bit or 7 on 64-bit platforms).
+        # In practice it seems that shapes contain many integers
+        # whose value is up to a few thousands, which the algorithm below
+        # compresses down to 2 bytes.  Very small values compress down to a
+        # single byte.
+
+        # Callee-save regs plus ret addr
+        min_size = len(cls.CALLEE_SAVE_REGISTERS) + 1
+
+        assert len(shape) >= min_size
+        shape = list(shape)
+        assert 0 not in shape[min_size:]
+        shape.insert(min_size, 0)
+        result = []
+        for loc in shape:
+            assert loc >= 0
+            flag = 0
+            while loc >= 0x80:
+                result.append(int(loc & 0x7F) | flag)
+                flag = 0x80
+                loc >>= 7
+            result.append(int(loc) | flag)
+        result.reverse()
+        return result
+
+    @classmethod
+    def decompress_callshape(cls, bytes):
+        # For tests.  This logic is copied in asmgcroot.py.
+        result = []
+        n = 0
+        while n < len(bytes):
+            value = 0
+            while True:
+                b = bytes[n]
+                n += 1
+                value += b
+                if b < 0x80:
+                    break
+                value = (value - 0x80) << 7
+            result.append(value)
+        result.reverse()
+        assert result[5] == 0
+        del result[5]
+        return result
     # ____________________________________________________________
 
     CANNOT_COLLECT = {    # some of the most used functions that cannot collect
@@ -385,10 +436,9 @@
         'inc', 'dec', 'not', 'neg', 'or', 'and', 'sbb', 'adc',
         'shl', 'shr', 'sal', 'sar', 'rol', 'ror', 'mul', 'imul', 'div', 'idiv',
         'bswap', 'bt', 'rdtsc',
+        'punpck', 'pshufd', 
         # zero-extending moves should not produce GC pointers
         'movz',
-        # quadword operations
-        'movq',
         ])
 
     visit_movb = visit_nop
@@ -400,7 +450,7 @@
     visit_xorb = visit_nop
     visit_xorw = visit_nop
 
-    def visit_addl(self, line, sign=+1):
+    def _visit_add(self, line, sign=+1):
         match = self.r_binaryinsn.match(line)
         source = match.group("source")
         target = match.group("target")
@@ -415,8 +465,8 @@
         else:
             return []
 
-    def visit_subl(self, line):
-        return self.visit_addl(line, sign=-1)
+    def _visit_sub(self, line):
+        return self._visit_add(line, sign=-1)
 
     def unary_insn(self, line):
         match = self.r_unaryinsn.match(line)
@@ -439,8 +489,6 @@
         else:
             return []
 
-    visit_xorl = binary_insn   # used in "xor reg, reg" to create a NULL GC ptr
-    visit_orl = binary_insn
     # The various cmov* operations
     for name in '''
         e ne g ge l le a ae b be p np s ns o no
@@ -448,7 +496,7 @@
         locals()['visit_cmov' + name] = binary_insn
         locals()['visit_cmov' + name + 'l'] = binary_insn
 
-    def visit_andl(self, line):
+    def _visit_and(self, line):
         match = self.r_binaryinsn.match(line)
         target = match.group("target")
         if target == self.ESP:
@@ -460,9 +508,7 @@
         else:
             return self.binary_insn(line)
 
-    visit_and = visit_andl
-
-    def visit_leal(self, line):
+    def _visit_lea(self, line):
         match = self.r_binaryinsn.match(line)
         target = match.group("target")
         if target == self.ESP:
@@ -474,7 +520,7 @@
                     raise UnrecognizedOperation('epilogue without prologue')
                 ofs_from_ebp = int(match.group(1) or '0')
                 assert ofs_from_ebp <= 0
-                framesize = 4 - ofs_from_ebp
+                framesize = self.WORD - ofs_from_ebp
             else:
                 match = self.r_localvar_esp.match(source)
                 # leal 12(%esp), %esp
@@ -489,17 +535,23 @@
     def insns_for_copy(self, source, target):
         source = self.replace_symbols(source)
         target = self.replace_symbols(target)
-        if source == self.ESP or target == self.ESP:
+        if target == self.ESP:
             raise UnrecognizedOperation('%s -> %s' % (source, target))
         elif self.r_localvar.match(target):
             if self.r_localvar.match(source):
+                # eg, movl %eax, %ecx: possibly copies a GC root
                 return [InsnCopyLocal(source, target)]
             else:
+                # eg, movl (%eax), %edi or mov %esp, %edi: load a register
+                # from "outside".  If it contains a pointer to a GC root,
+                # it will be announced later with the GCROOT macro.
                 return [InsnSetLocal(target, [source])]
         else:
+            # eg, movl %ebx, (%edx) or mov %ebp, %esp: does not write into
+            # a general register
             return []
 
-    def visit_movl(self, line):
+    def _visit_mov(self, line):
         match = self.r_binaryinsn.match(line)
         source = match.group("source")
         target = match.group("target")
@@ -513,34 +565,24 @@
                           # gcc -fno-unit-at-a-time.
         return self.insns_for_copy(source, target)
 
-    visit_mov = visit_movl
-
-    def visit_pushl(self, line):
+    def _visit_push(self, line):
         match = self.r_unaryinsn.match(line)
         source = match.group(1)
-        return [InsnStackAdjust(-4)] + self.insns_for_copy(source, self.TOP_OF_STACK)
-
-    def visit_pushw(self, line):
-        return [InsnStackAdjust(-2)]   # rare but not impossible
+        return [InsnStackAdjust(-self.WORD)] + self.insns_for_copy(source, self.TOP_OF_STACK)
 
     def _visit_pop(self, target):
-        return self.insns_for_copy(self.TOP_OF_STACK, target) + [InsnStackAdjust(+4)]
-
-    def visit_popl(self, line):
-        match = self.r_unaryinsn.match(line)
-        target = match.group(1)
-        return self._visit_pop(target)
+        return self.insns_for_copy(self.TOP_OF_STACK, target) + [InsnStackAdjust(+self.WORD)]
 
     def _visit_prologue(self):
         # for the prologue of functions that use %ebp as frame pointer
         self.uses_frame_pointer = True
         self.r_localvar = self.r_localvarfp
-        return [InsnPrologue()]
+        return [InsnPrologue(self.WORD)]
 
     def _visit_epilogue(self):
         if not self.uses_frame_pointer:
             raise UnrecognizedOperation('epilogue without prologue')
-        return [InsnEpilogue(4)]
+        return [InsnEpilogue(self.WORD)]
 
     def visit_leave(self, line):
         return self._visit_epilogue() + self._visit_pop(self.EBP)
@@ -662,7 +704,7 @@
     visit_jc = conditional_jump
     visit_jnc = conditional_jump
 
-    def visit_xchgl(self, line):
+    def _visit_xchg(self, line):
         # only support the format used in VALGRIND_DISCARD_TRANSLATIONS
         # which is to use a marker no-op "xchgl %ebx, %ebx"
         match = self.r_binaryinsn.match(line)
@@ -741,8 +783,172 @@
                 insns.append(InsnStackAdjust(16))
         return insns
 
+    # __________ debugging output __________
+
+    @classmethod
+    def format_location(cls, loc):
+        # A 'location' is a single number describing where a value is stored
+        # across a call.  It can be in one of the CALLEE_SAVE_REGISTERS, or
+        # in the stack frame at an address relative to either %esp or %ebp.
+        # The last two bits of the location number are used to tell the cases
+        # apart; see format_location().
+        assert loc >= 0
+        kind = loc & LOC_MASK
+        if kind == LOC_REG:
+            if loc == LOC_NOWHERE:
+                return '?'
+            reg = (loc >> 2) - 1
+            return '%' + cls.CALLEE_SAVE_REGISTERS[reg].replace("%", "")
+        else:
+            offset = loc & ~ LOC_MASK
+            if kind == LOC_EBP_PLUS:
+                result = '(%' + cls.EBP.replace("%", "") + ')'
+            elif kind == LOC_EBP_MINUS:
+                result = '(%' + cls.EBP.replace("%", "") + ')'
+                offset = -offset
+            elif kind == LOC_ESP_PLUS:
+                result = '(%' + cls.ESP.replace("%", "") + ')'
+            else:
+                assert 0, kind
+            if offset != 0:
+                result = str(offset) + result
+            return result
+
+    @classmethod
+    def format_callshape(cls, shape):
+        # A 'call shape' is a tuple of locations in the sense of
+        # format_location().  They describe where in a function frame
+        # interesting values are stored, when this function executes a 'call'
+        # instruction.
+        #
+        #   shape[0]    is the location that stores the fn's own return
+        #               address (not the return address for the currently
+        #               executing 'call')
+        #
+        #   shape[1..N] is where the fn saved its own caller's value of a
+        #               certain callee save register. (where N is the number
+        #               of callee save registers.)
+        #
+        #   shape[>N]   are GC roots: where the fn has put its local GCPTR
+        #               vars
+        #
+        num_callee_save_regs = len(cls.CALLEE_SAVE_REGISTERS)
+        assert isinstance(shape, tuple)
+        # + 1 for the return address
+        assert len(shape) >= (num_callee_save_regs + 1)
+        result = [cls.format_location(loc) for loc in shape]
+        return '{%s | %s | %s}' % (result[0],
+                                   ', '.join(result[1:(num_callee_save_regs+1)]),
+                                   ', '.join(result[(num_callee_save_regs+1):]))
+
+
+class FunctionGcRootTracker32(FunctionGcRootTracker):
+    WORD = 4
+
+    visit_mov = FunctionGcRootTracker._visit_mov
+    visit_movl = FunctionGcRootTracker._visit_mov
+    visit_pushl = FunctionGcRootTracker._visit_push
+    visit_leal = FunctionGcRootTracker._visit_lea
+
+    visit_addl = FunctionGcRootTracker._visit_add
+    visit_subl = FunctionGcRootTracker._visit_sub
+    visit_andl = FunctionGcRootTracker._visit_and
+    visit_and = FunctionGcRootTracker._visit_and
+
+    visit_xchgl = FunctionGcRootTracker._visit_xchg
+
+    # used in "xor reg, reg" to create a NULL GC ptr
+    visit_xorl = FunctionGcRootTracker.binary_insn
+    visit_orl = FunctionGcRootTracker.binary_insn     # unsure about this one
+
+    # occasionally used on 32-bits to move floats around
+    visit_movq = FunctionGcRootTracker.visit_nop
+
+    def visit_pushw(self, line):
+        return [InsnStackAdjust(-2)]   # rare but not impossible
 
-class ElfFunctionGcRootTracker(FunctionGcRootTracker):
+    def visit_popl(self, line):
+        match = self.r_unaryinsn.match(line)
+        target = match.group(1)
+        return self._visit_pop(target)
+
+class FunctionGcRootTracker64(FunctionGcRootTracker):
+    WORD = 8
+
+    # Regex ignores destination
+    r_save_xmm_register = re.compile(r"\tmovaps\s+%xmm(\d+)")
+
+    def _maybe_32bit_dest(func):
+        def wrapper(self, line):
+            # Using a 32-bit reg as a destination in 64-bit mode zero-extends
+            # to 64-bits, so sometimes gcc uses a 32-bit operation to copy a
+            # statically known pointer to a register
+
+            # %eax -> %rax
+            new_line = re.sub(r"%e(ax|bx|cx|dx|di|si)$", r"%r\1", line)
+            # %r10d -> %r10
+            new_line = re.sub(r"%r(\d+)d$", r"%r\1", new_line)
+            return func(self, new_line)
+        return wrapper
+
+    visit_addl = FunctionGcRootTracker.visit_nop
+    visit_subl = FunctionGcRootTracker.visit_nop
+    visit_leal = FunctionGcRootTracker.visit_nop
+
+    visit_cltq = FunctionGcRootTracker.visit_nop
+
+    visit_movq = FunctionGcRootTracker._visit_mov
+    # just a special assembler mnemonic for mov
+    visit_movabsq = FunctionGcRootTracker._visit_mov
+    visit_mov = _maybe_32bit_dest(FunctionGcRootTracker._visit_mov)
+    visit_movl = visit_mov
+
+    visit_xorl = _maybe_32bit_dest(FunctionGcRootTracker.binary_insn)
+    
+    visit_pushq = FunctionGcRootTracker._visit_push
+
+    visit_addq = FunctionGcRootTracker._visit_add
+    visit_subq = FunctionGcRootTracker._visit_sub
+
+    visit_leaq = FunctionGcRootTracker._visit_lea
+
+    visit_xorq = FunctionGcRootTracker.binary_insn
+
+    # FIXME: similar to visit_popl for 32-bit
+    def visit_popq(self, line):
+        match = self.r_unaryinsn.match(line)
+        target = match.group(1)
+        return self._visit_pop(target)
+
+    def visit_jmp(self, line):
+        # On 64-bit, %al is used when calling varargs functions to specify an
+        # upper-bound on the number of xmm registers used in the call. gcc
+        # uses %al to compute an indirect jump that looks like:
+        #
+        #     jmp *[some register]
+        #     movaps %xmm7, [stack location]
+        #     movaps %xmm6, [stack location]
+        #     movaps %xmm5, [stack location]
+        #     movaps %xmm4, [stack location]
+        #     movaps %xmm3, [stack location]
+        #     movaps %xmm2, [stack location]
+        #     movaps %xmm1, [stack location]
+        #     movaps %xmm0, [stack location]
+        #
+        # The jmp is always to somewhere in the block of "movaps"
+        # instructions, according to how many xmm registers need to be saved
+        # to the stack. The point of all this is that we can safely ignore
+        # jmp instructions of that form.
+        if (self.currentlineno + 8) < len(self.lines) and self.r_unaryinsn_star.match(line):
+            matches = [self.r_save_xmm_register.match(self.lines[self.currentlineno + 1 + i]) for i in range(8)]
+            if all(m and int(m.group(1)) == (7 - i) for i, m in enumerate(matches)):
+                return []
+
+        return FunctionGcRootTracker.visit_jmp(self, line)
+
+
+
+class ElfFunctionGcRootTracker32(FunctionGcRootTracker32):
     format = 'elf'
 
     ESP     = '%esp'
@@ -791,7 +997,65 @@
         match = self.r_functionend.match(lines[-1])
         assert funcname == match.group(1)
         assert funcname == match.group(2)
-        super(ElfFunctionGcRootTracker, self).__init__(
+        super(ElfFunctionGcRootTracker32, self).__init__(
+            funcname, lines, filetag)
+
+    def extract_immediate(self, value):
+        if not value.startswith('$'):
+            return None
+        return int(value[1:])
+
+ElfFunctionGcRootTracker32.init_regexp()
+
+class ElfFunctionGcRootTracker64(FunctionGcRootTracker64):
+    format = 'elf64'
+    ESP = '%rsp'
+    EBP = '%rbp'
+    EAX = '%rax'
+    CALLEE_SAVE_REGISTERS = ['%rbx', '%r12', '%r13', '%r14', '%r15', '%rbp']
+    REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
+                   for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
+    OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
+    LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
+    OFFSET_LABELS   = 2**30
+    TOP_OF_STACK = '0(%rsp)'
+
+    r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
+    r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
+    LOCALVAR = r"%rax|%rbx|%rcx|%rdx|%rdi|%rsi|%rbp|%r8|%r9|%r10|%r11|%r12|%r13|%r14|%r15|-?\d*[(]%rsp[)]"
+    LOCALVARFP = LOCALVAR + r"|-?\d*[(]%rbp[)]"
+    r_localvarnofp  = re.compile(LOCALVAR)
+    r_localvarfp    = re.compile(LOCALVARFP)
+    r_localvar_esp  = re.compile(r"(-?\d*)[(]%rsp[)]")
+    r_localvar_ebp  = re.compile(r"(-?\d*)[(]%rbp[)]")
+
+    r_rel_label      = re.compile(r"(\d+):\s*$")
+    r_jump_rel_label = re.compile(r"\tj\w+\s+"+"(\d+)f"+"\s*$")
+
+    r_unaryinsn_star= re.compile(r"\t[a-z]\w*\s+[*]("+OPERAND+")\s*$")
+    r_jmptable_item = re.compile(r"\t.quad\t"+LABEL+"(-\"[A-Za-z0-9$]+\")?\s*$")
+    r_jmptable_end  = re.compile(r"\t.text|\t.section\s+.text|\t\.align|"+LABEL)
+
+    r_gcroot_marker = re.compile(r"\t/[*] GCROOT ("+LOCALVARFP+") [*]/")
+    r_gcnocollect_marker = re.compile(r"\t/[*] GC_NOCOLLECT ("+OPERAND+") [*]/")
+    r_bottom_marker = re.compile(r"\t/[*] GC_STACK_BOTTOM [*]/")
+
+    FUNCTIONS_NOT_RETURNING = {
+        'abort': None,
+        '_exit': None,
+        '__assert_fail': None,
+        '___assert_rtn': None,
+        'L___assert_rtn$stub': None,
+        'L___eprintf$stub': None,
+        }
+
+    def __init__(self, lines, filetag=0):
+        match = self.r_functionstart.match(lines[0])
+        funcname = match.group(1)
+        match = self.r_functionend.match(lines[-1])
+        assert funcname == match.group(1)
+        assert funcname == match.group(2)
+        super(ElfFunctionGcRootTracker64, self).__init__(
             funcname, lines, filetag)
 
     def extract_immediate(self, value):
@@ -799,9 +1063,9 @@
             return None
         return int(value[1:])
 
-ElfFunctionGcRootTracker.init_regexp()
+ElfFunctionGcRootTracker64.init_regexp()
 
-class DarwinFunctionGcRootTracker(ElfFunctionGcRootTracker):
+class DarwinFunctionGcRootTracker(ElfFunctionGcRootTracker32):
     format = 'darwin'
 
     r_functionstart = re.compile(r"_(\w+):\s*$")
@@ -810,7 +1074,7 @@
     def __init__(self, lines, filetag=0):
         match = self.r_functionstart.match(lines[0])
         funcname = '_' + match.group(1)
-        FunctionGcRootTracker.__init__(self, funcname, lines, filetag)
+        FunctionGcRootTracker32.__init__(self, funcname, lines, filetag)
 
 class Mingw32FunctionGcRootTracker(DarwinFunctionGcRootTracker):
     format = 'mingw32'
@@ -821,7 +1085,7 @@
         '__assert': None,
         }
 
-class MsvcFunctionGcRootTracker(FunctionGcRootTracker):
+class MsvcFunctionGcRootTracker(FunctionGcRootTracker32):
     format = 'msvc'
     ESP = 'esp'
     EBP = 'ebp'
@@ -906,12 +1170,12 @@
         push pop mov lea
         xor sub add
         '''.split():
-        locals()['visit_' + name] = getattr(FunctionGcRootTracker,
+        locals()['visit_' + name] = getattr(FunctionGcRootTracker32,
                                             'visit_' + name + 'l')
 
-    visit_int = FunctionGcRootTracker.visit_nop
+    visit_int = FunctionGcRootTracker32.visit_nop
     # probably not GC pointers
-    visit_cdq  = FunctionGcRootTracker.visit_nop
+    visit_cdq  = FunctionGcRootTracker32.visit_nop
 
     def visit_npad(self, line):
         # MASM has a nasty bug: it implements "npad 5" with "add eax, 0"
@@ -1038,7 +1302,7 @@
         table = tracker.computegcmaptable(self.verbose)
         if self.verbose > 1:
             for label, state in table:
-                print >> sys.stderr, label, '\t', format_callshape(state)
+                print >> sys.stderr, label, '\t', tracker.format_callshape(state)
         table = compress_gcmaptable(table)
         if self.shuffle and random.random() < 0.5:
             self.gcmaptable[:0] = table
@@ -1049,7 +1313,7 @@
 
 class ElfAssemblerParser(AssemblerParser):
     format = "elf"
-    FunctionGcRootTracker = ElfFunctionGcRootTracker
+    FunctionGcRootTracker = ElfFunctionGcRootTracker32
 
     def find_functions(self, iterlines):
         functionlines = []
@@ -1072,6 +1336,10 @@
             "missed the end of the previous function")
         yield False, functionlines
 
+class ElfAssemblerParser64(ElfAssemblerParser):
+    format = "elf64"
+    FunctionGcRootTracker = ElfFunctionGcRootTracker64
+
 class DarwinAssemblerParser(AssemblerParser):
     format = "darwin"
     FunctionGcRootTracker = DarwinFunctionGcRootTracker
@@ -1241,6 +1509,7 @@
 
 PARSERS = {
     'elf': ElfAssemblerParser,
+    'elf64': ElfAssemblerParser64,
     'darwin': DarwinAssemblerParser,
     'mingw32': Mingw32AssemblerParser,
     'msvc': MsvcAssemblerParser,
@@ -1281,6 +1550,13 @@
             txt = kwargs[self.format]
             print >> output, "\t%s" % txt
 
+        if self.format == 'elf64':
+            word_decl = '.quad'
+        else:
+            word_decl = '.long'
+
+        tracker_cls = PARSERS[self.format].FunctionGcRootTracker
+
         # The pypy_asm_stackwalk() function
 
         if self.format == 'msvc':
@@ -1327,7 +1603,56 @@
                }
             }
             """
+        elif self.format == 'elf64':
+            print >> output, "\t.text"
+            print >> output, "\t.globl %s" % _globalname('pypy_asm_stackwalk')
+            print >> output, "\t.type pypy_asm_stackwalk, @function"
+            print >> output, "%s:" % _globalname('pypy_asm_stackwalk')
+
+            print >> output, """\
+            /* See description in asmgcroot.py */
+            movq\t%rdi, %rdx\t/* 1st argument, which is the callback */
+            movq\t%rsi, %rcx\t/* 2nd argument, which is gcrootanchor */
+            movq\t%rsp, %rax\t/* my frame top address */
+            pushq\t%rax\t\t/* ASM_FRAMEDATA[8] */
+            pushq\t%rbp\t\t/* ASM_FRAMEDATA[7] */
+            pushq\t%r15\t\t/* ASM_FRAMEDATA[6] */
+            pushq\t%r14\t\t/* ASM_FRAMEDATA[5] */
+            pushq\t%r13\t\t/* ASM_FRAMEDATA[4] */
+            pushq\t%r12\t\t/* ASM_FRAMEDATA[3] */
+            pushq\t%rbx\t\t/* ASM_FRAMEDATA[2] */
+
+            /* Add this ASM_FRAMEDATA to the front of the circular linked */
+            /* list.  Let's call it 'self'.                               */
 
+            movq\t8(%rcx), %rax\t/* next = gcrootanchor->next */
+            pushq\t%rax\t\t\t\t/* self->next = next */
+            pushq\t%rcx\t\t\t/* self->prev = gcrootanchor */
+            movq\t%rsp, 8(%rcx)\t/* gcrootanchor->next = self */
+            movq\t%rsp, 0(%rax)\t\t\t/* next->prev = self */
+
+            /* note: the Mac OS X 16 bytes aligment must be respected. */
+            call\t*%rdx\t\t/* invoke the callback */
+
+            /* Detach this ASM_FRAMEDATA from the circular linked list */
+            popq\t%rsi\t\t/* prev = self->prev */
+            popq\t%rdi\t\t/* next = self->next */
+            movq\t%rdi, 8(%rsi)\t/* prev->next = next */
+            movq\t%rsi, 0(%rdi)\t/* next->prev = prev */
+
+            popq\t%rbx\t\t/* restore from ASM_FRAMEDATA[2] */
+            popq\t%r12\t\t/* restore from ASM_FRAMEDATA[3] */
+            popq\t%r13\t\t/* restore from ASM_FRAMEDATA[4] */
+            popq\t%r14\t\t/* restore from ASM_FRAMEDATA[5] */
+            popq\t%r15\t\t/* restore from ASM_FRAMEDATA[6] */
+            popq\t%rbp\t\t/* restore from ASM_FRAMEDATA[7] */
+            popq\t%rcx\t\t/* ignored      ASM_FRAMEDATA[8] */
+
+            /* the return value is the one of the 'call' above, */
+            /* because %rax (and possibly %rdx) are unmodified  */
+            ret
+            .size pypy_asm_stackwalk, .-pypy_asm_stackwalk
+            """
         else:
             print >> output, "\t.text"
             print >> output, "\t.globl %s" % _globalname('pypy_asm_stackwalk')
@@ -1401,7 +1726,7 @@
                     n = shapes[state]
                 except KeyError:
                     n = shapes[state] = shapeofs
-                    bytes = [str(b) for b in compress_callshape(state)]
+                    bytes = [str(b) for b in tracker_cls.compress_callshape(state)]
                     shapelines.append('\t%s,\t/* %s */\n' % (
                             ', '.join(bytes),
                             shapeofs))
@@ -1433,17 +1758,18 @@
                     n = shapes[state]
                 except KeyError:
                     n = shapes[state] = shapeofs
-                    bytes = [str(b) for b in compress_callshape(state)]
+                    bytes = [str(b) for b in tracker_cls.compress_callshape(state)]
                     shapelines.append('\t/*%d*/\t.byte\t%s\n' % (
                         shapeofs,
                         ', '.join(bytes)))
                     shapeofs += len(bytes)
                 if is_range:
                     n = ~ n
-                print >> output, '\t.long\t%s-%d' % (
+                print >> output, '\t%s\t%s-%d' % (
+                    word_decl,
                     label,
-                    PARSERS[self.format].FunctionGcRootTracker.OFFSET_LABELS)
-                print >> output, '\t.long\t%d' % (n,)
+                    tracker_cls.OFFSET_LABELS)
+                print >> output, '\t%s\t%d' % (word_decl, n)
 
             print >> output, """\
             .globl __gcmapend
@@ -1451,6 +1777,7 @@
             """.replace("__gcmapend", _globalname("__gcmapend"))
 
             _variant(elf='.section\t.rodata',
+                     elf64='.section\t.rodata',
                      darwin='.const',
                      mingw32='')
 
@@ -1483,56 +1810,6 @@
     pass
 
 
-# __________ debugging output __________
-
-def format_location(loc):
-    # A 'location' is a single number describing where a value is stored
-    # across a call.  It can be in one of the CALLEE_SAVE_REGISTERS, or
-    # in the stack frame at an address relative to either %esp or %ebp.
-    # The last two bits of the location number are used to tell the cases
-    # apart; see format_location().
-    assert loc >= 0
-    kind = loc & LOC_MASK
-    if kind == LOC_REG:
-        if loc == LOC_NOWHERE:
-            return '?'
-        reg = (loc >> 2) - 1
-        return ElfFunctionGcRootTracker.CALLEE_SAVE_REGISTERS[reg]
-    else:
-        offset = loc & ~ LOC_MASK
-        if kind == LOC_EBP_PLUS:
-            result = '(%ebp)'
-        elif kind == LOC_EBP_MINUS:
-            result = '(%ebp)'
-            offset = -offset
-        elif kind == LOC_ESP_PLUS:
-            result = '(%esp)'
-        else:
-            assert 0, kind
-        if offset != 0:
-            result = str(offset) + result
-        return result
-
-def format_callshape(shape):
-    # A 'call shape' is a tuple of locations in the sense of format_location().
-    # They describe where in a function frame interesting values are stored,
-    # when this function executes a 'call' instruction.
-    #
-    #   shape[0] is the location that stores the fn's own return address
-    #            (not the return address for the currently executing 'call')
-    #   shape[1] is where the fn saved its own caller's %ebx value
-    #   shape[2] is where the fn saved its own caller's %esi value
-    #   shape[3] is where the fn saved its own caller's %edi value
-    #   shape[4] is where the fn saved its own caller's %ebp value
-    #   shape[>=5] are GC roots: where the fn has put its local GCPTR vars
-    #
-    assert isinstance(shape, tuple)
-    assert len(shape) >= 5
-    result = [format_location(loc) for loc in shape]
-    return '{%s | %s | %s}' % (result[0],
-                               ', '.join(result[1:5]),
-                               ', '.join(result[5:]))
-
 # __________ table compression __________
 
 def compress_gcmaptable(table):
@@ -1559,49 +1836,6 @@
         yield (label1, state, is_range)
         i = j
 
-def compress_callshape(shape):
-    # For a single shape, this turns the list of integers into a list of
-    # bytes and reverses the order of the entries.  The length is
-    # encoded by inserting a 0 marker after the gc roots coming from
-    # shape[5:] and before the 5 values coming from shape[4] to
-    # shape[0].  In practice it seems that shapes contain many integers
-    # whose value is up to a few thousands, which the algorithm below
-    # compresses down to 2 bytes.  Very small values compress down to a
-    # single byte.
-    assert len(shape) >= 5
-    shape = list(shape)
-    assert 0 not in shape[5:]
-    shape.insert(5, 0)
-    result = []
-    for loc in shape:
-        assert loc >= 0
-        flag = 0
-        while loc >= 0x80:
-            result.append(int(loc & 0x7F) | flag)
-            flag = 0x80
-            loc >>= 7
-        result.append(int(loc) | flag)
-    result.reverse()
-    return result
-
-def decompress_callshape(bytes):
-    # For tests.  This logic is copied in asmgcroot.py.
-    result = []
-    n = 0
-    while n < len(bytes):
-        value = 0
-        while True:
-            b = bytes[n]
-            n += 1
-            value += b
-            if b < 0x80:
-                break
-            value = (value - 0x80) << 7
-        result.append(value)
-    result.reverse()
-    assert result[5] == 0
-    del result[5]
-    return result
 
 def getidentifier(s):
     def mapchar(c):
@@ -1626,7 +1860,10 @@
     elif sys.platform == 'win32':
         format = 'mingw32'
     else:
-        format = 'elf'
+        if sys.maxint > 2147483647:
+            format = 'elf64'
+        else:
+            format = 'elf'
     entrypoint = 'main'
     while len(sys.argv) > 1:
         if sys.argv[1] == '-v':

Modified: pypy/trunk/pypy/translator/platform/linux.py
==============================================================================
--- pypy/trunk/pypy/translator/platform/linux.py	(original)
+++ pypy/trunk/pypy/translator/platform/linux.py	Wed Sep  8 10:06:15 2010
@@ -3,7 +3,7 @@
 from pypy.translator.platform import _run_subprocess
 from pypy.translator.platform.posix import BasePosix
 
-class Linux(BasePosix):
+class BaseLinux(BasePosix):
     name = "linux"
     
     link_flags = ('-pthread', '-lrt')
@@ -25,10 +25,12 @@
         return self._pkg_config("libffi", "--libs-only-L",
                                 ['/usr/lib/libffi'])
 
+
+class Linux(BaseLinux):
     def library_dirs_for_libffi_a(self):
         # places where we need to look for libffi.a
         return self.library_dirs_for_libffi() + ['/usr/lib']
 
 
-class Linux64(Linux):
-    shared_only = ('-fPIC',)
+class Linux64(BaseLinux):
+    pass

Modified: pypy/trunk/pypy/translator/platform/posix.py
==============================================================================
--- pypy/trunk/pypy/translator/platform/posix.py	(original)
+++ pypy/trunk/pypy/translator/platform/posix.py	Wed Sep  8 10:06:15 2010
@@ -104,6 +104,11 @@
         else:
             target_name = exe_name.basename
 
+        if shared:
+            cflags = self.cflags + self.shared_only
+        else:
+            cflags = self.cflags + self.standalone_only
+
         m = GnuMakefile(path)
         m.exe_name = exe_name
         m.eci = eci
@@ -132,7 +137,7 @@
             ('LIBS', self._libs(eci.libraries)),
             ('LIBDIRS', self._libdirs(eci.library_dirs)),
             ('INCLUDEDIRS', self._includedirs(rel_includedirs)),
-            ('CFLAGS', self.cflags),
+            ('CFLAGS', cflags),
             ('CFLAGSEXTRA', list(eci.compile_extra)),
             ('LDFLAGS', linkflags),
             ('LDFLAGSEXTRA', list(eci.link_extra)),



More information about the Pypy-commit mailing list