[pypy-commit] pypy reflex-support: merge default into branch

wlav noreply at buildbot.pypy.org
Thu Aug 11 02:31:50 CEST 2011


Author: Wim Lavrijsen <WLavrijsen at lbl.gov>
Branch: reflex-support
Changeset: r46431:3df02e2fcf1e
Date: 2011-08-08 15:53 -0700
http://bitbucket.org/pypy/pypy/changeset/3df02e2fcf1e/

Log:	merge default into branch

diff --git a/pypy/doc/release-1.6.0.rst b/pypy/doc/release-1.6.0.rst
--- a/pypy/doc/release-1.6.0.rst
+++ b/pypy/doc/release-1.6.0.rst
@@ -2,4 +2,84 @@
 PyPy 1.6 - faster than ever
 ===========================
 
-We're pleased to announce 1.6 release of PyPy.
+We're pleased to announce the 1.6 release of PyPy. This release brings a lot
+of bugfixes and performance improvements over 1.5, and improves support for
+Windows 32bit and OS X 64bit. This version fully implements Python 2.7.1 and
+has beta level support for loading CPython C extensions.  You can download it
+here:
+
+    http://pypy.org/download.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7.1. It's fast (`pypy 1.5 and cpython 2.6.2`_ performance comparison)
+due to its integrated tracing JIT compiler. XXX: compare to 2.7.1
+
+This release supports x86 machines running Linux 32/64 or Mac OS X.  Windows 32
+is beta (it roughly works but a lot of small issues have not been fixed so
+far).  Windows 64 is not yet supported.
+
+The main topics of this release are speed and stability: on average, PyPy 1.6
+is between 20% and 30% faster than PyPy 1.5, and overall it's 4.3 times faster
+than CPython when running our set of benchmarks.
+
+The speed improvements have been made possible by optimizing many of the
+layers which compose PyPy.  In particular, we improved: the Garbage Collector,
+the JIT warmup time, the optimizations performed by the JIT, the quality of
+the generated machine code and the implementation of our Python interpreter.
+
+
+Highlights
+==========
+
+* Numerous performance improvements, overall giving considerable speedups:
+
+  - better GC behavior when dealing with very large objects and arrays
+
+  - `fast ctypes`_: now calls to ctypes functions are seen and optimized
+    by the JIT, and they are up to 60 times faster than PyPy 1.5 and 10 times
+    faster than CPython
+
+  - improved generators(1): simple generators now are inlined into the caller
+    loop, making performance up to 3.5 times faster than PyPy 1.5.
+
+  - improved generators(2): thanks to other optimizations, even generators
+    that are not inlined are between 10% and 20% faster than PyPy 1.5.
+
+  - faster warmup time for the JIT
+
+  - JIT support for single floats (e.g., for ``array('f')``)
+
+  - optimized dictionaries: the internal representation of dictionaries is now
+    dynamically selected depending on the type of stored objects, resulting in
+    faster code and smaller memory footprint.  For example, dictionaries whose
+    keys are all strings, or all integers.
+
+* JitViewer: this is the first official release which includes the JitViewer,
+  a web-based tool which helps you to see which parts of your Python code have
+  been compiled by the JIT, down until the assembler. XXX: publish a public
+  demo?
+
+- The CPython extension module API has been improved and now supports many
+  more extensions. For information on which one are supported, please refer to
+  our `compatibility wiki`_.
+
+* Multibyte encoding support: this was of of the last areas in which we were
+  still behind CPython, but now we fully support them. (XXX: is that true?)
+
+* Preliminary support for NumPy: this release includes a preview of a very
+  fast NumPy module integrated with the PyPy JIT.  Unfortunately, this does
+  not mean that you can expect to take an existing NumPy program and run it on
+  PyPy, because the module is still unfinished and supports only some of the
+  numpy API.  However, what works is blazingly fast :-)
+
+* Bugfixes: since the 1.5 release we fixed 53 bugs in our `bug tracker`_, not
+  counting the numerous bugs that were found and reported through other
+  channels than the bug tracker.
+
+Cheers,
+
+Carl Friedrich Bolz, Laura Creighton, Antonio Cuni, Maciej Fijalkowski,
+Amaury Forgeot d'Arc, Alex Gaynor, Armin Rigo and the PyPy team
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -198,7 +198,6 @@
             self.vable_array_vars[op.result]= self.vable_array_vars[op.args[0]]
 
     rewrite_op_cast_pointer = rewrite_op_same_as
-    rewrite_op_cast_opaque_ptr = rewrite_op_same_as   # rlib.rerased
     def rewrite_op_cast_bool_to_int(self, op): pass
     def rewrite_op_cast_bool_to_uint(self, op): pass
     def rewrite_op_cast_char_to_int(self, op): pass
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -500,6 +500,9 @@
     @arguments("r", returns="i")
     def bhimpl_ptr_nonzero(a):
         return bool(a)
+    @arguments("r", returns="r")
+    def bhimpl_cast_opaque_ptr(a):
+        return a
 
     @arguments("i", returns="i")
     def bhimpl_int_copy(a):
diff --git a/pypy/jit/metainterp/optimizeopt/heap.py b/pypy/jit/metainterp/optimizeopt/heap.py
--- a/pypy/jit/metainterp/optimizeopt/heap.py
+++ b/pypy/jit/metainterp/optimizeopt/heap.py
@@ -239,13 +239,14 @@
             return
         cf.force_lazy_setfield(self, can_cache)
 
-    def force_lazy_setarrayitem(self, arraydescr, can_cache=True):
+    def force_lazy_setarrayitem(self, arraydescr, indexvalue=None, can_cache=True):
         try:
             submap = self.cached_arrayitems[arraydescr]
         except KeyError:
             return
-        for cf in submap.values():
-            cf.force_lazy_setfield(self, can_cache)
+        for idx, cf in submap.iteritems():
+            if indexvalue is None or indexvalue.intbound.contains(idx):
+                cf.force_lazy_setfield(self, can_cache)
 
     def fixup_guard_situation(self):
         # hackish: reverse the order of the last two operations if it makes
@@ -357,7 +358,7 @@
                 return
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr())
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue)
         # default case: produce the operation
         arrayvalue.ensure_nonnull()
         self.emit_operation(op)
@@ -381,7 +382,7 @@
             cf.do_setfield(self, op)
         else:
             # variable index, so make sure the lazy setarrayitems are done
-            self.force_lazy_setarrayitem(op.getdescr(), can_cache=False)
+            self.force_lazy_setarrayitem(op.getdescr(), indexvalue=indexvalue, can_cache=False)
             # and then emit the operation
             self.emit_operation(op)
 
diff --git a/pypy/jit/metainterp/optimizeopt/intbounds.py b/pypy/jit/metainterp/optimizeopt/intbounds.py
--- a/pypy/jit/metainterp/optimizeopt/intbounds.py
+++ b/pypy/jit/metainterp/optimizeopt/intbounds.py
@@ -125,6 +125,17 @@
         r = self.getvalue(op.result)
         r.intbound.intersect(v1.intbound.div_bound(v2.intbound))
 
+    def optimize_INT_MOD(self, op):
+        self.emit_operation(op)
+        v2 = self.getvalue(op.getarg(1))
+        if v2.is_constant():
+            val = v2.box.getint()
+            r = self.getvalue(op.result)
+            if val < 0:
+                val = -val
+            r.intbound.make_gt(IntBound(-val, -val))
+            r.intbound.make_lt(IntBound(val, val))
+
     def optimize_INT_LSHIFT(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -264,6 +264,7 @@
         self.posponedop = None
         self.exception_might_have_happened = False
         self.quasi_immutable_deps = None
+        self.opaque_pointers = {}
         self.newoperations = []
         if loop is not None:
             self.call_pure_results = loop.call_pure_results
@@ -555,6 +556,11 @@
     def optimize_DEBUG_MERGE_POINT(self, op):
         self.emit_operation(op)
 
+    def optimize_CAST_OPAQUE_PTR(self, op):
+        value = self.getvalue(op.getarg(0))
+        self.opaque_pointers[value] = True
+        self.make_equal_to(op.result, value)
+
 dispatch_opt = make_dispatcher_method(Optimizer, 'optimize_',
         default=Optimizer.optimize_default)
 
diff --git a/pypy/jit/metainterp/optimizeopt/simplify.py b/pypy/jit/metainterp/optimizeopt/simplify.py
--- a/pypy/jit/metainterp/optimizeopt/simplify.py
+++ b/pypy/jit/metainterp/optimizeopt/simplify.py
@@ -25,6 +25,8 @@
         #     but it's a bit hard to implement robustly if heap.py is also run
         pass
 
+    optimize_CAST_OPAQUE_PTR = optimize_VIRTUAL_REF
+
 
 dispatch_opt = make_dispatcher_method(OptSimplify, 'optimize_',
         default=OptSimplify.emit_operation)
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -4621,6 +4621,96 @@
         """
         self.optimize_strunicode_loop(ops, expected)
 
+    def test_intmod_bounds(self):
+        ops = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i3 = int_gt(i2, 12)
+        guard_false(i3) []
+        i4 = int_lt(i2, -12)
+        guard_false(i4) []
+        i5 = int_mod(i1, -12)
+        i6 = int_lt(i5, -12)
+        guard_false(i6) []
+        i7 = int_gt(i5, 12)
+        guard_false(i7) []
+        jump(i2, i5)
+        """
+        expected = """
+        [i0, i1]
+        i2 = int_mod(i0, 12)
+        i5 = int_mod(i1, -12)
+        jump(i2, i5)
+        """
+        self.optimize_loop(ops, expected)
+
+        # This the sequence of resoperations that is generated for a Python
+        # app-level int % int.  When the modulus is constant and when i0
+        # is known non-negative it should be optimized to a single int_mod.
+        ops = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(42, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i5 = int_ge(i0, 0)
+        guard_true(i5) []
+        i1 = int_mod(i0, 42)
+        finish(i1)
+        """
+        py.test.skip("in-progress")
+        self.optimize_loop(ops, expected)
+
+        # Also, 'n % power-of-two' can be turned into int_and(),
+        # but that's a bit harder to detect here because it turns into
+        # several operations, and of course it is wrong to just turn
+        # int_mod(i0, 16) into int_and(i0, 15).
+        ops = """
+        [i0]
+        i1 = int_mod(i0, 16)
+        i2 = int_rshift(i1, 63)
+        i3 = int_and(16, i2)
+        i4 = int_add(i1, i3)
+        finish(i4)
+        """
+        expected = """
+        [i0]
+        i4 = int_and(i0, 15)
+        finish(i4)
+        """
+        py.test.skip("harder")
+        self.optimize_loop(ops, expected)
+
+    def test_bounded_lazy_setfield(self):
+        ops = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        setarrayitem_gc(p0, i0, 15)
+        i2 = getarrayitem_gc(p0, 2)
+        jump(p0, i2)
+        """
+        # Remove the getarrayitem_gc, because we know that p[i0] does not alias
+        # p0[2]
+        expected = """
+        [p0, i0]
+        i1 = int_gt(i0, 2)
+        guard_true(i1) []
+        setarrayitem_gc(p0, i0, 15)
+        setarrayitem_gc(p0, 0, 3)
+        setarrayitem_gc(p0, 2, 4)
+        jump(p0, 4)
+        """
+        self.optimize_loop(ops, expected)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -513,6 +513,9 @@
     def safe_to_move(self, op):
         opnum = op.getopnum()
         descr = op.getdescr()
+        for box in op.getarglist():
+            if self.optimizer.getvalue(box) in self.optimizer.opaque_pointers:
+                return False
         if op.is_always_pure() or op.is_foldable_guard():
             return True
         elif opnum == rop.JUMP:
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -233,6 +233,10 @@
         return self.execute(rop.PTR_EQ, box, history.CONST_NULL)
 
     @arguments("box")
+    def opimpl_cast_opaque_ptr(self, box):
+        return self.execute(rop.CAST_OPAQUE_PTR, box)
+
+    @arguments("box")
     def _opimpl_any_return(self, box):
         self.metainterp.finishframe(box)
 
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -437,6 +437,7 @@
     #
     'PTR_EQ/2b',
     'PTR_NE/2b',
+    'CAST_OPAQUE_PTR/1b',
     #
     'ARRAYLEN_GC/1d',
     'STRLEN/1',
diff --git a/pypy/jit/metainterp/test/test_loop.py b/pypy/jit/metainterp/test/test_loop.py
--- a/pypy/jit/metainterp/test/test_loop.py
+++ b/pypy/jit/metainterp/test/test_loop.py
@@ -800,6 +800,69 @@
 
         res = self.meta_interp(f, [200])
 
+    def test_regular_pointers_in_short_preamble(self):
+        # XXX do we really care about this case?  If not, we should
+        # at least detect it and complain during codewriter/jtransform
+        from pypy.rpython.lltypesystem import lltype
+        BASE = lltype.GcStruct('BASE')
+        A = lltype.GcStruct('A', ('parent', BASE), ('val', lltype.Signed))
+        B = lltype.GcStruct('B', ('parent', BASE), ('charval', lltype.Char))
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        def f(n, m, j):
+            i = sa = 0
+            pa = lltype.malloc(A)
+            pa.val = 7
+            p = pa.parent
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, j=j, sa=sa, p=p)
+                if i < m:
+                    pa = lltype.cast_pointer(lltype.Ptr(A), p)
+                    sa += pa.val
+                elif i == m:
+                    pb = lltype.malloc(B)
+                    pb.charval = 'y'
+                    p = pb.parent
+                else:
+                    pb = lltype.cast_pointer(lltype.Ptr(B), p)
+                    sa += ord(pb.charval)
+                sa += 100
+                assert n>0 and m>0
+                i += j
+            return sa
+        expected = f(20, 10, 1)
+        res = self.meta_interp(f, [20, 10, 1])
+        assert res == expected
+
+    def test_unerased_pointers_in_short_preamble(self):
+        from pypy.rlib.rerased import new_erasing_pair
+        from pypy.rpython.lltypesystem import lltype
+        class A(object):
+            def __init__(self, val):
+                self.val = val
+        erase_A, unerase_A = new_erasing_pair('A')
+        erase_TP, unerase_TP = new_erasing_pair('TP')
+        TP = lltype.GcArray(lltype.Signed)
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        def f(n, m, j):
+            i = sa = 0
+            p = erase_A(A(7))
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, m=m, i=i, j=j, sa=sa, p=p)
+                if i < m:
+                    sa += unerase_A(p).val
+                elif i == m:
+                    a = lltype.malloc(TP, 5)
+                    a[0] = 42
+                    p = erase_TP(a)
+                else:
+                    sa += unerase_TP(p)[0]
+                sa += A(i).val
+                assert n>0 and m>0
+                i += j
+            return sa
+        res = self.meta_interp(f, [20, 10, 1])
+        assert res == f(20, 10, 1)
+
 class TestOOtype(LoopTest, OOJitMixin):
     pass
 
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -53,7 +53,7 @@
 class OpParser(object):
 
     use_mock_model = False
-    
+
     def __init__(self, input, cpu, namespace, type_system, boxkinds,
                  invent_fail_descr=default_fail_descr,
                  nonstrict=False):
@@ -187,7 +187,7 @@
             poss_descr = allargs[-1].strip()
             if poss_descr.startswith('descr='):
                 descr = self.get_descr(poss_descr[len('descr='):])
-                allargs = allargs[:-1]        
+                allargs = allargs[:-1]
             for arg in allargs:
                 arg = arg.strip()
                 try:
@@ -240,7 +240,7 @@
             fail_args = None
             if opnum == rop.FINISH:
                 if descr is None and self.invent_fail_descr:
-                    descr = self.invent_fail_descr(self.model)
+                    descr = self.invent_fail_descr(self.model, fail_args)
             elif opnum == rop.JUMP:
                 if descr is None and self.invent_fail_descr:
                     descr = self.looptoken
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -115,7 +115,7 @@
         s_x = annmodel.not_const(s_x)
         access_directly = 's_access_directly' in kwds_s
         fresh_virtualizable = 's_fresh_virtualizable' in kwds_s
-        if  access_directly or fresh_virtualizable:
+        if access_directly or fresh_virtualizable:
             assert access_directly, "lone fresh_virtualizable hint"
             if isinstance(s_x, annmodel.SomeInstance):
                 from pypy.objspace.flow.model import Constant
@@ -280,7 +280,7 @@
 
     def specialize_call(self, hop):
         pass
-    
+
 vref_None = non_virtual_ref(None)
 
 # ____________________________________________________________
@@ -290,7 +290,7 @@
     """Inconsistency in the JIT hints."""
 
 PARAMETERS = {'threshold': 1032, # just above 1024
-              'function_threshold': 1617, # slightly more than one above 
+              'function_threshold': 1617, # slightly more than one above
               'trace_eagerness': 200,
               'trace_limit': 12000,
               'inlining': 1,
@@ -400,7 +400,7 @@
                             raise
     set_user_param._annspecialcase_ = 'specialize:arg(0)'
 
-    
+
     def on_compile(self, logger, looptoken, operations, type, *greenargs):
         """ A hook called when loop is compiled. Overwrite
         for your own jitdriver if you want to do something special, like
@@ -571,7 +571,7 @@
                 c_llname = hop.inputconst(lltype.Void, mangled_name)
                 getfield_op = self.get_getfield_op(hop.rtyper)
                 v_green = hop.genop(getfield_op, [v_red, c_llname],
-                                    resulttype = r_field)
+                                    resulttype=r_field)
                 s_green = s_red.classdef.about_attribute(fieldname)
                 assert s_green is not None
                 hop.rtyper.annotator.setbinding(v_green, s_green)
diff --git a/pypy/tool/gdb_pypy.py b/pypy/tool/gdb_pypy.py
--- a/pypy/tool/gdb_pypy.py
+++ b/pypy/tool/gdb_pypy.py
@@ -1,30 +1,60 @@
 """
-Some convenience macros for gdb.  Load them by putting this file somewhere in
-the path and then, from gdb:
+Some convenience macros for gdb.  If you have pypy in your path, you can simply do:
 
-(gdb) python import pypy_gdb
+(gdb) python import pypy.tool.gdb_pypy
 
 Or, alternatively:
 
-(gdb) python execfile('/path/to/pypy_gdb.py')
+(gdb) python execfile('/path/to/gdb_pypy.py')
 """
 
+from __future__ import with_statement
+
+import re
 import sys
-import gdb
+import os.path
 
-def load_typeids():
+try:
+    # when running inside gdb
+    from gdb import Command
+except ImportError:
+    # whenn running outside gdb: mock class for testing
+    class Command(object):
+        def __init__(self, name, command_class):
+            pass
+
+
+def find_field_with_suffix(val, suffix):
     """
-    Returns a mapping offset --> description
+    Return ``val[field]``, where ``field`` is the only one whose name ends
+    with ``suffix``.  If there is no such field, or more than one, raise KeyError.
     """
-    typeids = {}
-    for line in open('typeids.txt'):
-        member, descr = map(str.strip, line.split(None, 1))
-        expr = "((char*)(&pypy_g_typeinfo.%s)) - (char*)&pypy_g_typeinfo" % member
-        offset = int(gdb.parse_and_eval(expr))
-        typeids[offset] = descr
-    return typeids
+    names = []
+    for field in val.type.fields():
+        if field.name.endswith(suffix):
+            names.append(field.name)
+    #
+    if len(names) == 1:
+        return val[names[0]]
+    elif len(names) == 0:
+        raise KeyError, "cannot find field *%s" % suffix
+    else:
+        raise KeyError, "too many matching fields: %s" % ', '.join(names)
 
-class RPyType (gdb.Command):
+def lookup(val, suffix):
+    """
+    Lookup a field which ends with ``suffix`` following the rpython struct
+    inheritance hierarchy (i.e., looking both at ``val`` and
+    ``val['*_super']``, recursively.
+    """
+    try:
+        return find_field_with_suffix(val, suffix)
+    except KeyError:
+        baseobj = find_field_with_suffix(val, '_super')
+        return lookup(baseobj, suffix)
+
+
+class RPyType(Command):
     """
     Prints the RPython type of the expression (remember to dereference it!)
     It assumes to find ``typeids.txt`` in the current directory.
@@ -33,46 +63,138 @@
     (gdb) rpy_type *l_v123
     GcStruct pypy.foo.Bar { super, inst_xxx, inst_yyy }
     """
+
+    prog2typeids = {}
  
-    def __init__(self):
-        gdb.Command.__init__(self, "rpy_type", gdb.COMMAND_NONE)
-
-    ## # some magic code to automatically reload the python file while developing
-    ## def invoke(self, arg, from_tty):
-    ##     sys.path.insert(0, '')
-    ##     import gdb_pypy
-    ##     reload(gdb_pypy)
-    ##     self.__class__ = gdb_pypy.RPyType
-    ##     self.do_invoke(arg, from_tty)
+    def __init__(self, gdb=None):
+        # dependency injection, for tests
+        if gdb is None:
+            import gdb
+        self.gdb = gdb
+        Command.__init__(self, "rpy_type", self.gdb.COMMAND_NONE)
 
     def invoke(self, arg, from_tty):
-        obj = gdb.parse_and_eval(arg)
-        hdr = self.get_gc_header(obj)
+        # some magic code to automatically reload the python file while developing
+        ## from pypy.tool import gdb_pypy
+        ## reload(gdb_pypy)
+        ## gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache
+        ## self.__class__ = gdb_pypy.RPyType
+        print self.do_invoke(arg, from_tty)
+
+    def do_invoke(self, arg, from_tty):
+        obj = self.gdb.parse_and_eval(arg)
+        hdr = lookup(obj, '_gcheader')
         tid = hdr['h_tid']
         offset = tid & 0xFFFFFFFF # 64bit only
         offset = int(offset) # convert from gdb.Value to python int
-        typeids = load_typeids()
+        typeids = self.get_typeids()
         if offset in typeids:
-            print typeids[offset]
+            return typeids[offset]
         else:
-            print 'Cannot find the type with offset %d' % offset
+            return 'Cannot find the type with offset %d' % offset
 
-    def get_first_field_if(self, obj, suffix):
-        ctype = obj.type
-        field = ctype.fields()[0]
-        if field.name.endswith(suffix):
-            return obj[field.name]
+    def get_typeids(self):
+        progspace = self.gdb.current_progspace()
+        try:
+            return self.prog2typeids[progspace]
+        except KeyError:
+            typeids = self.load_typeids(progspace)
+            self.prog2typeids[progspace] = typeids
+            return typeids
+
+    def load_typeids(self, progspace):
+        """
+        Returns a mapping offset --> description
+        """
+        exename = progspace.filename
+        root = os.path.dirname(exename)
+        typeids_txt = os.path.join(root, 'typeids.txt')
+        if not os.path.exists(typeids_txt):
+            newroot = os.path.dirname(root)
+            typeids_txt = os.path.join(newroot, 'typeids.txt')
+        print 'loading', typeids_txt
+        typeids = {}
+        with open(typeids_txt) as f:
+            for line in f:
+                member, descr = map(str.strip, line.split(None, 1))
+                expr = "((char*)(&pypy_g_typeinfo.%s)) - (char*)&pypy_g_typeinfo" % member
+                offset = int(self.gdb.parse_and_eval(expr))
+                typeids[offset] = descr
+        return typeids
+
+
+def is_ptr(type, gdb):
+    if gdb is None:
+        import gdb # so we can pass a fake one from the tests
+    return type.code == gdb.TYPE_CODE_PTR
+
+
+class RPyStringPrinter(object):
+    """
+    Pretty printer for rpython strings.
+
+    Note that this pretty prints *pointers* to strings: this way you can do "p
+    val" and see the nice string, and "p *val" to see the underyling struct
+    fields
+    """
+    
+    def __init__(self, val):
+        self.val = val
+
+    @classmethod
+    def lookup(cls, val, gdb=None):
+        t = val.type
+        if is_ptr(t, gdb) and t.target().tag == 'pypy_rpy_string0':
+            return cls(val)
         return None
 
-    def get_super(self, obj):
-        return self.get_first_field_if(obj, '_super')
+    def to_string(self):
+        chars = self.val['rs_chars']
+        length = int(chars['length'])
+        items = chars['items']
+        res = [chr(items[i]) for i in range(length)]
+        string = ''.join(res)
+        return 'r' + repr(string)
 
-    def get_gc_header(self, obj):
-        while True:
-            sup = self.get_super(obj)
-            if sup is None:
-                break
-            obj = sup
-        return self.get_first_field_if(obj, '_gcheader')
 
-RPyType() # side effects
+class RPyListPrinter(object):
+    """
+    Pretty printer for rpython lists
+
+    Note that this pretty prints *pointers* to lists: this way you can do "p
+    val" and see the nice repr, and "p *val" to see the underyling struct
+    fields
+    """
+
+    def __init__(self, val):
+        self.val = val
+
+    @classmethod
+    def lookup(cls, val, gdb=None):
+        t = val.type
+        if is_ptr(t, gdb) and re.match(r'pypy_list\d*', t.target().tag):
+            return cls(val)
+        return None
+
+    def to_string(self):
+        length = int(self.val['l_length'])
+        array = self.val['l_items']
+        allocated = int(array['length'])
+        items = array['items']
+        itemlist = []
+        for i in range(length):
+            item = items[i]
+            itemlist.append(str(item))
+        str_items = ', '.join(itemlist)
+        return 'r[%s] (len=%d, alloc=%d)' % (str_items, length, allocated)
+
+
+try:
+    import gdb
+    RPyType() # side effects
+    gdb.pretty_printers += [
+        RPyStringPrinter.lookup,
+        RPyListPrinter.lookup
+        ]
+except ImportError:
+    pass
diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py
--- a/pypy/tool/release/force-builds.py
+++ b/pypy/tool/release/force-builds.py
@@ -22,7 +22,7 @@
 #    'own-macosx-x86-32',
 #    'pypy-c-app-level-linux-x86-32',
 #    'pypy-c-app-level-linux-x86-64',
-    'pypy-c-stackless-app-level-linux-x86-32',
+#    'pypy-c-stackless-app-level-linux-x86-32',
     'pypy-c-app-level-win-x86-32',
     'pypy-c-jit-linux-x86-32',
     'pypy-c-jit-linux-x86-64',
diff --git a/pypy/tool/test/test_gdb_pypy.py b/pypy/tool/test/test_gdb_pypy.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/test/test_gdb_pypy.py
@@ -0,0 +1,180 @@
+import py
+from pypy.tool import gdb_pypy
+
+class FakeGdb(object):
+
+    COMMAND_NONE = -1
+    #
+    TYPE_CODE_PTR = 1
+    TYPE_CODE_ARRAY = 2
+    TYPE_CODE_STRUCT = 3
+
+    def __init__(self, exprs, progspace=None):
+        self.exprs = exprs
+        self.progspace = progspace
+
+    def parse_and_eval(self, expr):
+        return self.exprs[expr]
+
+    def current_progspace(self):
+        return self.progspace
+
+
+class Mock(object):
+    def __init__(self, **attrs):
+        self.__dict__.update(attrs)
+
+class Field(Mock):
+    pass
+
+class Struct(object):
+    code = FakeGdb.TYPE_CODE_STRUCT
+    
+    def __init__(self, fieldnames, tag):
+        self._fields = [Field(name=name) for name in fieldnames]
+        self.tag = tag
+
+    def fields(self):
+        return self._fields[:]
+
+class Pointer(object):
+    code = FakeGdb.TYPE_CODE_PTR
+
+    def __init__(self, target):
+        self._target = target
+
+    def target(self):
+        return self._target
+
+class Value(dict):
+    def __init__(self, *args, **kwds):
+        type_tag = kwds.pop('type_tag', None)
+        dict.__init__(self, *args, **kwds)
+        self.type = Struct(self.keys(), type_tag)
+        for key, val in self.iteritems():
+            if isinstance(val, dict):
+                self[key] = Value(val)
+
+class PtrValue(Value):
+    def __init__(self, *args, **kwds):
+        # in python gdb, we can use [] to access fields either if we have an
+        # actual struct or a pointer to it, so we just reuse Value here
+        Value.__init__(self, *args, **kwds)
+        self.type = Pointer(self.type)
+
+def test_mock_objects():
+    d = {'a': 1,
+         'b': 2,
+         'super': {
+            'c': 3,
+            }
+         }
+    val = Value(d)
+    assert val['a'] == 1
+    assert val['b'] == 2
+    assert isinstance(val['super'], Value)
+    assert val['super']['c'] == 3
+    fields = val.type.fields()
+    names = [f.name for f in fields]
+    assert sorted(names) == ['a', 'b', 'super']
+
+def test_find_field_with_suffix():
+    obj = Value(x_foo = 1,
+                y_bar = 2,
+                z_foobar = 3)
+    assert gdb_pypy.find_field_with_suffix(obj, 'foo') == 1
+    assert gdb_pypy.find_field_with_suffix(obj, 'foobar') == 3
+    py.test.raises(KeyError, "gdb_pypy.find_field_with_suffix(obj, 'bar')")
+    py.test.raises(KeyError, "gdb_pypy.find_field_with_suffix(obj, 'xxx')")
+
+def test_lookup():
+    d = {'r_super': {
+            '_gcheader': {
+                'h_tid': 123,
+                }
+            },
+         'r_foo': 42,
+         }
+    obj = Value(d)
+    assert gdb_pypy.lookup(obj, 'foo') == 42
+    hdr = gdb_pypy.lookup(obj, 'gcheader')
+    assert hdr['h_tid'] == 123
+
+def test_load_typeids(tmpdir):
+    exe = tmpdir.join('testing_1').join('pypy-c')
+    typeids = tmpdir.join('typeids.txt')
+    typeids.write("""
+member0    GcStruct xxx {}
+""".strip())
+    progspace = Mock(filename=str(exe))
+    exprs = {
+        '((char*)(&pypy_g_typeinfo.member0)) - (char*)&pypy_g_typeinfo': 0,
+        }
+    gdb = FakeGdb(exprs, progspace)
+    cmd = gdb_pypy.RPyType(gdb)
+    typeids = cmd.load_typeids(progspace)
+    assert typeids[0] == 'GcStruct xxx {}'
+
+def test_RPyType(tmpdir):
+    exe = tmpdir.join('pypy-c')
+    typeids = tmpdir.join('typeids.txt')
+    typeids.write("""
+member0    GcStruct xxx {}
+member1    GcStruct yyy {}
+member2    GcStruct zzz {}
+""".strip())
+    #
+    progspace = Mock(filename=str(exe))
+    d = {'r_super': {
+            '_gcheader': {
+                'h_tid': 123,
+                }
+            },
+         'r_foo': 42,
+         }
+    myvar = Value(d)
+    exprs = {
+        '*myvar': myvar,
+        '((char*)(&pypy_g_typeinfo.member0)) - (char*)&pypy_g_typeinfo': 0,
+        '((char*)(&pypy_g_typeinfo.member1)) - (char*)&pypy_g_typeinfo': 123,
+        '((char*)(&pypy_g_typeinfo.member2)) - (char*)&pypy_g_typeinfo': 456,
+        }
+    gdb = FakeGdb(exprs, progspace)
+    cmd = gdb_pypy.RPyType(gdb)
+    assert cmd.do_invoke('*myvar', True) == 'GcStruct yyy {}'
+
+def test_pprint_string():
+    d = {'_gcheader': {
+            'h_tid': 123
+            },
+         'rs_hash': 456,
+         'rs_chars': {
+            'length': 6,
+            'items': map(ord, 'foobar'),
+            }
+         }
+    p_string = PtrValue(d, type_tag='pypy_rpy_string0')
+    printer = gdb_pypy.RPyStringPrinter.lookup(p_string, FakeGdb)
+    assert printer.to_string() == "r'foobar'"
+
+def test_pprint_list():
+    d = {'_gcheader': {
+            'h_tid': 123
+            },
+         'l_length': 3, # the lenght of the rpython list
+         'l_items':
+             # this is the array which contains the items
+             {'_gcheader': {
+                'h_tid': 456
+                },
+              'length': 5, # the lenght of the underlying array
+              'items': [40, 41, 42, -1, -2],
+              }
+         }
+    mylist = PtrValue(d, type_tag='pypy_list0')
+    printer = gdb_pypy.RPyListPrinter.lookup(mylist, FakeGdb)
+    assert printer.to_string() == 'r[40, 41, 42] (len=3, alloc=5)'
+    #
+    mylist.type.target().tag = 'pypy_list1234'
+    printer = gdb_pypy.RPyListPrinter.lookup(mylist, FakeGdb)
+    assert printer.to_string() == 'r[40, 41, 42] (len=3, alloc=5)'
diff --git a/pypy/translator/translator.py b/pypy/translator/translator.py
--- a/pypy/translator/translator.py
+++ b/pypy/translator/translator.py
@@ -16,7 +16,7 @@
 import py
 log = py.log.Producer("flowgraph")
 py.log.setconsumer("flowgraph", ansi_log)
- 
+
 class TranslationContext(object):
     FLOWING_FLAGS = {
         'verbose': False,
@@ -105,7 +105,7 @@
             raise ValueError("we already have an rtyper")
         from pypy.rpython.rtyper import RPythonTyper
         self.rtyper = RPythonTyper(self.annotator,
-                                   type_system = type_system)
+                                   type_system=type_system)
         return self.rtyper
 
     def getexceptiontransformer(self):


More information about the pypy-commit mailing list