[pypy-svn] r35482 - in pypy/branch/jit-real-world/pypy: interpreter jit jit/codegen/i386 module/pypyjit

Fri Dec 8 14:51:23 CET 2006

Author: arigo
Date: Fri Dec  8 14:51:20 2006
New Revision: 35482

Modified:
   pypy/branch/jit-real-world/pypy/interpreter/pyopcode.py
   pypy/branch/jit-real-world/pypy/jit/TODO.txt
   pypy/branch/jit-real-world/pypy/jit/codegen/i386/codebuf.py
   pypy/branch/jit-real-world/pypy/jit/codegen/i386/viewcode.py
   pypy/branch/jit-real-world/pypy/module/pypyjit/interp_jit.py
Log:
(pedronis, arigo)

A bug fix in our hacked interpreter main loop.
More JIT debugging support.


Modified: pypy/branch/jit-real-world/pypy/interpreter/pyopcode.py
==============================================================================

--- pypy/branch/jit-real-world/pypy/interpreter/pyopcode.py	(original)
+++ pypy/branch/jit-real-world/pypy/interpreter/pyopcode.py	Fri Dec  8 14:51:20 2006
@@ -109,8 +109,8 @@
         space = self.space
         while True:
             hint(None, global_merge_point=True)
+            self.last_instr = intmask(next_instr)
             if not we_are_translated():   # JJJ
-                self.last_instr = intmask(next_instr)
                 ec.bytecode_trace(self)
                 # For the sequel, force 'next_instr' to be unsigned for performance
                 next_instr = r_uint(self.last_instr)
@@ -152,6 +152,7 @@
                     continue    # now inside a 'finally' block
 
             if opcode == opcodedesc.YIELD_VALUE.index:
+                #self.last_instr = intmask(next_instr - 1) XXX clean up!
                 w_yieldvalue = self.valuestack.pop()
                 return w_yieldvalue
 

Modified: pypy/branch/jit-real-world/pypy/jit/TODO.txt
==============================================================================
--- pypy/branch/jit-real-world/pypy/jit/TODO.txt	(original)
+++ pypy/branch/jit-real-world/pypy/jit/TODO.txt	Fri Dec  8 14:51:20 2006
@@ -1,4 +1,4 @@
-- review codegen (e.g. getfield/setfield for size != word)
+- fix the XXXs in codegen
 
 - [] (getitem) used inside complex try/except ends up using the bound
   checking version: transformation after flowgraphing that marks op
@@ -36,3 +36,8 @@
 
 - reduce the size of the timeshifted code: no dispatch for no
   split/merge functions ...
+
+- insert hints to forget uninteresting constants, e.g. the
+  initial 0 of RPython list iterators
+
+- pyopcode.compare_dispatch_table => switch statement

Modified: pypy/branch/jit-real-world/pypy/jit/codegen/i386/codebuf.py
==============================================================================
--- pypy/branch/jit-real-world/pypy/jit/codegen/i386/codebuf.py	(original)
+++ pypy/branch/jit-real-world/pypy/jit/codegen/i386/codebuf.py	Fri Dec  8 14:51:20 2006
@@ -61,6 +61,7 @@
 class MachineCodeDumper:
     enabled = True
     log_fd = -1
+    sys_executable = None
 
     def open(self):
         if self.log_fd < 0:
@@ -77,6 +78,12 @@
                 os.write(2, "could not create log file\n")
                 self.enabled = False
                 return False
+            # log the executable name
+            from pypy.jit.codegen.hlinfo import highleveljitinfo
+            os.write(self.log_fd, 'BACKEND i386\n')
+            if highleveljitinfo.sys_executable:
+                os.write(self.log_fd, 'SYS_EXECUTABLE %s\n' % (
+                    highleveljitinfo.sys_executable,))
         return True
 
     def dump(self, cb, tag, pos, msg):

Modified: pypy/branch/jit-real-world/pypy/jit/codegen/i386/viewcode.py
==============================================================================
--- pypy/branch/jit-real-world/pypy/jit/codegen/i386/viewcode.py	(original)
+++ pypy/branch/jit-real-world/pypy/jit/codegen/i386/viewcode.py	Fri Dec  8 14:51:20 2006
@@ -20,24 +20,42 @@
 # Some support code from Psyco.  There is more over there,
 # I am porting it in a lazy fashion...  See py-utils/xam.py
 
-# the disassembler to use. 'objdump' writes GNU-style instructions.
-# 'ndisasm' uses Intel syntax.  XXX ndisasm output parsing is missing...
-
-objdump = 'objdump -b binary -m i386 --adjust-vma=%(origin)d -D %(file)s'
-#objdump = 'ndisasm -o %(origin)d -u %(file)s'
 if sys.platform == "win32":
     XXX   # lots more in Psyco
 
 def machine_code_dump(data, originaddr):
+    # the disassembler to use. 'objdump' writes GNU-style instructions.
+    # 'ndisasm' would use Intel syntax, but you need to fix the output parsing.
+    objdump = 'objdump -b binary -m i386 --adjust-vma=%(origin)d -D %(file)s'
+    #
     f = open(tmpfile, 'wb')
     f.write(data)
     f.close()
     g = os.popen(objdump % {'file': tmpfile, 'origin': originaddr}, 'r')
     result = g.readlines()
     g.close()
-    return result
+    return result[6:]   # drop some objdump cruft
+
+def load_symbols(filename):
+    # the program that lists symbols, and the output it gives
+    symbollister = 'nm %s'
+    re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
+    #
+    symbols = {}
+    g = os.popen(symbollister % filename, "r")
+    for line in g:
+        match = re_symbolentry.match(line)
+        if match:
+            addr = long(match.group(1), 16)
+            name = match.group(2)
+            if name.startswith('pypy_g_'):
+                name = '\xb7' + name[7:]
+            symbols[addr] = name
+    g.close()
+    return symbols
 
-re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F]+)')
+re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]+)')
+re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
 
 def lineaddresses(line):
     result = []
@@ -56,7 +74,8 @@
 class CodeRange(object):
     fallthrough = False
 
-    def __init__(self, addr, data):
+    def __init__(self, world, addr, data):
+        self.world = world
         self.addr = addr
         self.data = data
 
@@ -87,7 +106,23 @@
     def disassemble(self):
         if not hasattr(self, 'text'):
             lines = machine_code_dump(self.data, self.addr)
-            self.text = ''.join(lines[6:])   # drop some objdump cruft
+            # instead of adding symbol names in the dumps we could
+            # also make the 0xNNNNNNNN addresses be red and show the
+            # symbol name when the mouse is over them
+            logentries = self.world.logentries
+            symbols = self.world.symbols
+            for i, line in enumerate(lines):
+                match = re_lineaddr.match(line)
+                if match:
+                    addr = long(match.group(1), 16)
+                    logentry = logentries.get(addr)
+                    if logentry:
+                        lines[i] = '\n%s\n%s' % (logentry, lines[i])
+                for addr in lineaddresses(line):
+                    sym = symbols.get(addr)
+                    if sym:
+                        lines[i] = '%s\t%s\n' % (lines[i].rstrip(), sym)
+            self.text = ''.join(lines)
         return self.text
 
     def findjumps(self):
@@ -112,6 +147,8 @@
         self.ranges = []
         self.labeltargets = {}
         self.jumps = {}
+        self.symbols = {}
+        self.logentries = {}
 
     def parse(self, f):
         for line in f:
@@ -123,7 +160,7 @@
                 offset = int(pieces[2][1:])
                 addr = baseaddr + offset
                 data = pieces[3].replace(':', '').decode('hex')
-                coderange = CodeRange(addr, data)
+                coderange = CodeRange(self, addr, data)
                 # XXX sloooooooow!
                 for r in self.ranges:
                     if addr < r.addr+len(r.data) and r.addr < addr+len(data):
@@ -131,6 +168,17 @@
                         break
                 else:
                     self.ranges.append(coderange)
+            elif line.startswith('LOG '):
+                pieces = line.split(None, 3)
+                assert pieces[1].startswith('@')
+                assert pieces[2].startswith('+')
+                baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
+                offset = int(pieces[2][1:])
+                addr = baseaddr + offset
+                self.logentries[addr] = pieces[3]
+            elif line.startswith('SYS_EXECUTABLE '):
+                filename = line[len('SYS_EXECUTABLE '):].strip()
+                self.symbols.update(load_symbols(filename))
         # find cross-references between blocks
         for r in self.ranges:
             for lineno, targetaddr, _ in r.findjumps():
@@ -145,7 +193,7 @@
                 if i in t:
                     print i
                     ofs = i - r.addr
-                    self.ranges.append(CodeRange(i, r.data[ofs:]))
+                    self.ranges.append(CodeRange(self, i, r.data[ofs:]))
                     r.data = r.data[:ofs]
                     r.fallthrough = True
                     try:
@@ -161,7 +209,7 @@
             text, width = tab2columns(r.disassemble())
             text = '0x%x\n\n%s' % (r.addr, text)
             g1.emit_node('N_%x' % r.addr, shape="box", label=text,
-                         width=str(width*0.125))
+                         width=str(width*0.1125))
             for lineno, targetaddr, final in r.findjumps():
                 if final:
                     color = "black"
@@ -175,7 +223,7 @@
     lines = text.split('\n')
     columnwidth = []
     for line in lines:
-        columns = line.split('\t')
+        columns = line.split('\t')[:-1]
         while len(columnwidth) < len(columns):
             columnwidth.append(0)
         for i, s in enumerate(columns):
@@ -183,6 +231,7 @@
             if not s.endswith(':'):
                 width += 2
             columnwidth[i] = max(columnwidth[i], width)
+    columnwidth.append(1)
     result = []
     for line in lines:
         columns = line.split('\t')
@@ -190,10 +239,9 @@
         for width, s in zip(columnwidth, columns):
             text.append(s.strip().ljust(width))
         result.append(' '.join(text))
-    if result:
-        totalwidth = len(result[0])
-    else:
-        totalwidth = 1
+    lengths = [len(line) for line in result]
+    lengths.append(1)
+    totalwidth = max(lengths)
     return '\\l'.join(result), totalwidth
 
 # ____________________________________________________________

Modified: pypy/branch/jit-real-world/pypy/module/pypyjit/interp_jit.py
==============================================================================
--- pypy/branch/jit-real-world/pypy/module/pypyjit/interp_jit.py	(original)
+++ pypy/branch/jit-real-world/pypy/module/pypyjit/interp_jit.py	Fri Dec  8 14:51:20 2006
@@ -73,5 +73,14 @@
 # Public interface
 
 def enable(space, w_code, w_enabled=True):
+    # save the app-level sys.executable in JITInfo, where the machine
+    # code backend can fish for it - XXX the following import will look
+    # less obscure once codebuf.py is moved to a general
+    # processor-independent place
+    from pypy.jit.codegen.hlinfo import highleveljitinfo
+    if highleveljitinfo.sys_executable is None:
+        highleveljitinfo.sys_executable = space.str_w(
+            space.sys.get('executable'))
+
     code = space.interp_w(PyCode, w_code)
     code.jit_enable = space.is_true(w_enabled)