[pypy-svn] r33193 - pypy/dist/pypy/jit/codegen/i386

arigo at codespeak.net arigo at codespeak.net
Wed Oct 11 22:45:19 CEST 2006


Author: arigo
Date: Wed Oct 11 22:45:16 2006
New Revision: 33193

Added:
   pypy/dist/pypy/jit/codegen/i386/viewcode.py   (contents, props changed)
Modified:
   pypy/dist/pypy/jit/codegen/i386/codebuf.py
   pypy/dist/pypy/jit/codegen/i386/rgenop.py
Log:
- The start of a graphviewer-based inspector for the generated machine
  code.  Needs more work...

- reindent the write() method.



Modified: pypy/dist/pypy/jit/codegen/i386/codebuf.py
==============================================================================
--- pypy/dist/pypy/jit/codegen/i386/codebuf.py	(original)
+++ pypy/dist/pypy/jit/codegen/i386/codebuf.py	Wed Oct 11 22:45:16 2006
@@ -2,6 +2,11 @@
 from ctypes import POINTER, cast, c_char, c_void_p, CFUNCTYPE, c_int
 from ri386 import I386CodeBuilder
 
+# Set this to enable/disable the CODE_DUMP stdout lines
+CODE_DUMP = True
+
+# ____________________________________________________________
+
 
 modname = 'pypy.jit.codegen.i386.codebuf_' + os.name
 memhandler = __import__(modname, globals(), locals(), ['__doc__'])
@@ -13,6 +18,7 @@
     pass
 
 class InMemoryCodeBuilder(I386CodeBuilder):
+    _last_dump_start = 0
 
     def __init__(self, start, end):
         map_size = end - start
@@ -26,22 +32,41 @@
         self._pos = 0
 
     def write(self, data):
-         p = self._pos
-         if p + len(data) > self._size:
-             raise CodeBlockOverflow
-         for c in data:
-             self._data.contents[p] = c
-             p += 1
-         self._pos = p
+        p = self._pos
+        if p + len(data) > self._size:
+            raise CodeBlockOverflow
+        for c in data:
+            self._data.contents[p] = c
+            p += 1
+        self._pos = p
 
     def tell(self):
         baseaddr = cast(self._data, c_void_p).value
         return baseaddr + self._pos
 
     def execute(self, arg1, arg2):
+        # XXX old testing stuff
         fnptr = cast(self._data, binaryfn)
         return fnptr(arg1, arg2)
 
+    def done(self):
+        # normally, no special action is needed here
+        if CODE_DUMP:
+            self.dump_range(self._last_dump_start, self._pos)
+            self._last_dump_start = self._pos
+
+    def dump_range(self, start, end):
+        HEX = '0123456789ABCDEF'
+        dump = []
+        for p in range(start, end):
+            o = ord(self._data.contents[p])
+            dump.append(HEX[o >> 4])
+            dump.append(HEX[o & 15])
+            if (p & 3) == 3:
+                dump.append(':')
+        os.write(2, 'CODE_DUMP @%x +%d  %s\n' % (self.tell() - self._pos,
+                                                 start, ''.join(dump)))
+
 
 class MachineCodeBlock(InMemoryCodeBuilder):
 

Modified: pypy/dist/pypy/jit/codegen/i386/rgenop.py
==============================================================================
--- pypy/dist/pypy/jit/codegen/i386/rgenop.py	(original)
+++ pypy/dist/pypy/jit/codegen/i386/rgenop.py	Wed Oct 11 22:45:16 2006
@@ -175,6 +175,7 @@
             mc.JMP(rel32(self.default_case_addr))
         else:
             mc.UD2()
+        mc.done()
         self.nextfreepos = pos
         return targetbuilder
 
@@ -186,6 +187,7 @@
         end   = self.endfreepos
         mc = InMemoryCodeBuilder(start, end)
         mc.JMP(rel32(self.default_case_addr))
+        mc.done()
         return targetbuilder
 
 
@@ -202,6 +204,7 @@
         return [Var(pos) for pos in range(numargs-1, -1, -1)]
 
     def _close(self):
+        self.mc.done()
         self.rgenop.close_mc(self.mc)
         self.mc = None
 

Added: pypy/dist/pypy/jit/codegen/i386/viewcode.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/jit/codegen/i386/viewcode.py	Wed Oct 11 22:45:16 2006
@@ -0,0 +1,227 @@
+#! /usr/bin/env python
+"""
+Viewer for the CODE_DUMP output of compiled programs generating code.
+
+Try:
+    ./viewcode.py dumpfile.txt
+or
+    /tmp/usession-xxx/testing_1/testing_1 -var 4  2>&1  |  ./viewcode.py
+"""
+
+import operator, sys, os, re, py
+
+# don't use pypy.tool.udir here to avoid removing old usessions which
+# might still contain interesting executables
+udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
+tmpfile = str(udir.join('dump.tmp'))
+
+# ____________________________________________________________
+# Some support code from Psyco.  There is more over there,
+# I am porting it in a lazy fashion...  See py-utils/xam.py
+
+# the disassembler to use. 'objdump' writes GNU-style instructions.
+# 'ndisasm' uses Intel syntax.  XXX ndisasm output parsing is missing...
+
+objdump = 'objdump -b binary -m i386 --adjust-vma=%(origin)d -D %(file)s'
+#objdump = 'ndisasm -o %(origin)d -u %(file)s'
+if sys.platform == "win32":
+    XXX   # lots more in Psyco
+
+def machine_code_dump(data, originaddr):
+    f = open(tmpfile, 'wb')
+    f.write(data)
+    f.close()
+    g = os.popen(objdump % {'file': tmpfile, 'origin': originaddr}, 'r')
+    result = g.readlines()
+    g.close()
+    return result
+
+re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F]+)')
+
+def lineaddresses(line):
+    result = []
+    i = 0
+    while 1:
+        match = re_addr.search(line, i)
+        if not match:
+            break
+        i = match.end()
+        addr = long(match.group(1), 16)
+        result.append(addr)
+    return result
+
+# ____________________________________________________________
+
+class CodeRange(object):
+
+    def __init__(self, addr, data):
+        self.addr = addr
+        self.data = data
+
+    def update(self, other):
+        if other.addr < self.addr:
+            delta = self.addr - other.addr
+            self.addr -= delta
+            self.offset += delta
+            self.data = '\x00'*delta + self.data
+        ofs1 = other.addr - self.addr
+        ofs2 = ofs1 + len(other.data)
+        self.data = self.data[:ofs1] + other.data + self.data[ofs2:]
+
+    def cmpop(op):
+        def _cmp(self, other):
+            if not isinstance(other, CodeRange):
+                return NotImplemented
+            return op((self.addr, self.data), (other.addr, other.data))
+        return _cmp
+    __lt__ = cmpop(operator.lt)
+    __le__ = cmpop(operator.le)
+    __eq__ = cmpop(operator.eq)
+    __ne__ = cmpop(operator.ne)
+    __gt__ = cmpop(operator.gt)
+    __ge__ = cmpop(operator.ge)
+    del cmpop
+
+    def disassemble(self):
+        if not hasattr(self, 'text'):
+            lines = machine_code_dump(self.data, self.addr)
+            self.text = ''.join(lines[6:])   # drop some objdump cruft
+        return self.text
+
+    def findjumps(self):
+        text = self.disassemble()
+        for i, line in enumerate(text.splitlines()):
+            if '\tj' not in line: # poor heuristic to recognize lines that
+                continue          # could be jump instructions
+            addrs = list(lineaddresses(line))
+            if not addrs:
+                continue
+            addr = addrs[-1]
+            yield i, addr
+
+
+class World(object):
+
+    def __init__(self):
+        self.ranges = []
+        self.labeltargets = {}
+        self.jumps = {}
+
+    def parse(self, f):
+        for line in f:
+            if line.startswith('CODE_DUMP '):
+                pieces = line.split()
+                assert pieces[1].startswith('@')
+                assert pieces[2].startswith('+')
+                baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
+                offset = int(pieces[2][1:])
+                addr = baseaddr + offset
+                data = pieces[3].replace(':', '').decode('hex')
+                coderange = CodeRange(addr, data)
+                # XXX sloooooooow!
+                for r in self.ranges:
+                    if addr < r.addr+len(r.data) and r.addr < addr+len(data):
+                        r.update(coderange)
+                        break
+                else:
+                    self.ranges.append(coderange)
+##        # find cross-references between blocks
+##        for r in self.ranges:
+##            for lineno, targetaddr in r.findjumps():
+##                self.labeltargets[targetaddr] = True
+##        # split blocks at labeltargets
+##        # XXX slooooow!
+##        t = self.labeltargets
+##        print t
+##        for r in self.ranges:
+##            print r.addr, r.addr + len(r.data)
+##            for i in range(r.addr + 1, r.addr + len(r.data)):
+##                if i in t:
+##                    print i
+##                    ofs = i - r.addr
+##                    self.ranges.append(CodeRange(i, r.data[ofs:]))
+##                    r.data = r.data[:ofs]
+##                    del r.text
+##                    break
+##        # hack hack hacked
+
+    def show(self):
+        g1 = Graph('codedump')
+        for r in self.ranges:
+            text = r.disassemble()
+            text = '0x%x\n\n%s' % (r.addr, text)
+            g1.emit_node('N_%x' % r.addr, shape="box", label=text)
+            for lineno, targetaddr in r.findjumps():
+                g1.emit_edge('N_%x' % r.addr, 'N_%x' % targetaddr)
+        g1.display()
+
+
+# ____________________________________________________________
+# XXX pasted from
+# http://codespeak.net/svn/user/arigo/hack/misc/graphlib.py
+# but needs to be a bit more subtle later
+
+from pypy.translator.tool.make_dot import DotGen
+from pypy.translator.tool.pygame.graphclient import display_layout
+
+class Graph(DotGen):
+
+    def highlight(self, word, text, linked_to=None):
+        if not hasattr(self, '_links'):
+            self._links = {}
+            self._links_to = {}
+        self._links[word] = text
+        if linked_to:
+            self._links_to[word] = linked_to
+
+    def display(self):
+        "Display a graph page locally."
+        display_layout(_Page(self))
+
+
+class NoGraph(Exception):
+    pass
+
+class _Page:
+    def __init__(self, graph_builder):
+        if callable(graph_builder):
+            graph = graph_builder()
+        else:
+            graph = graph_builder
+        if graph is None:
+            raise NoGraph
+        self.graph_builder = graph_builder
+
+    def content(self):
+        return _PageContent(self.graph_builder)
+
+class _PageContent:
+    def __init__(self, graph_builder):
+        if callable(graph_builder):
+            graph = graph_builder()
+        else:
+            graph = graph_builder
+        assert graph is not None
+        self.graph_builder = graph_builder
+        self.graph = graph
+        self.links = getattr(graph, '_links', {})
+        if not hasattr(graph, '_source'):
+            graph._source = graph.generate(target=None)
+        self.source = graph._source
+
+    def followlink(self, link):
+        try:
+            return _Page(self.graph._links_to[link])
+        except NoGraph:
+            return _Page(self.graph_builder)
+
+# ____________________________________________________________
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        f = sys.stdin
+    else:
+        f = open(sys.argv[1], 'r')
+    world = World()
+    world.parse(f)
+    world.show()



More information about the Pypy-commit mailing list