[Python-checkins] r80324 - in python/branches/py3k: Lib/test/gdb_sample.py Lib/test/test_gdb.py Tools/gdb/libpython.py

martin.v.loewis python-checkins at python.org
Thu Apr 22 00:38:42 CEST 2010


Author: martin.v.loewis
Date: Thu Apr 22 00:38:42 2010
New Revision: 80324

Log:
Issue #8380: Port gdb/libpython to 3.x.


Modified:
   python/branches/py3k/Lib/test/gdb_sample.py
   python/branches/py3k/Lib/test/test_gdb.py
   python/branches/py3k/Tools/gdb/libpython.py

Modified: python/branches/py3k/Lib/test/gdb_sample.py
==============================================================================
--- python/branches/py3k/Lib/test/gdb_sample.py	(original)
+++ python/branches/py3k/Lib/test/gdb_sample.py	Thu Apr 22 00:38:42 2010
@@ -7,6 +7,6 @@
     baz(a, b, c)
 
 def baz(*args):
-    print(42)
+    id(42)
 
 foo(1, 2, 3)

Modified: python/branches/py3k/Lib/test/test_gdb.py
==============================================================================
--- python/branches/py3k/Lib/test/test_gdb.py	(original)
+++ python/branches/py3k/Lib/test/test_gdb.py	Thu Apr 22 00:38:42 2010
@@ -45,6 +45,8 @@
 
 HAS_PYUP_PYDOWN = gdb_has_frame_select()
 
+BREAKPOINT_FN='builtin_id'
+
 class DebuggerTests(unittest.TestCase):
 
     """Test that the debugger can debug Python."""
@@ -57,10 +59,10 @@
         out, err = subprocess.Popen(
             args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
             ).communicate()
-        return out.decode('iso-8859-1'), err.decode('iso-8859-1')
+        return out.decode('utf-8'), err.decode('utf-8')
 
     def get_stack_trace(self, source=None, script=None,
-                        breakpoint='textiowrapper_write',
+                        breakpoint=BREAKPOINT_FN,
                         cmds_after_breakpoint=None,
                         import_site=False):
         '''
@@ -129,19 +131,21 @@
                      cmds_after_breakpoint=None,
                      import_site=False):
         # Given an input python source representation of data,
-        # run "python -c'print DATA'" under gdb with a breakpoint on
-        # textiowrapper_write and scrape out gdb's representation of the "op"
+        # run "python -c'id(DATA)'" under gdb with a breakpoint on
+        # builtin_id and scrape out gdb's representation of the "op"
         # parameter, and verify that the gdb displays the same string
         #
+        # Verify that the gdb displays the expected string
+        #
         # For a nested structure, the first time we hit the breakpoint will
         # give us the top-level structure
-        gdb_output = self.get_stack_trace(source, breakpoint='textiowrapper_write',
+        gdb_output = self.get_stack_trace(source, breakpoint=BREAKPOINT_FN,
                                           cmds_after_breakpoint=cmds_after_breakpoint,
                                           import_site=import_site)
         # gdb can insert additional '\n' and space characters in various places
         # in its output, depending on the width of the terminal it's connected
         # to (using its "wrap_here" function)
-        m = re.match('.*#0\s+textiowrapper_write\s+\(\s*op\=\s*(.*?),\s+fp=.*\).*',
+        m = re.match('.*#0\s+builtin_id\s+\(self\=.*,\s+v=\s*(.*?)\)\s+at\s+Python/bltinmodule.c.*',
                      gdb_output, re.DOTALL)
         if not m:
             self.fail('Unexpected gdb output: %r\n%s' % (gdb_output, gdb_output))
@@ -154,37 +158,34 @@
 
     def assertMultilineMatches(self, actual, pattern):
         m = re.match(pattern, actual, re.DOTALL)
-        self.assert_(m,
-                     msg='%r did not match %r' % (actual, pattern))
+        if not m:
+            self.fail(msg='%r did not match %r' % (actual, pattern))
 
     def get_sample_script(self):
         return findfile('gdb_sample.py')
 
 class PrettyPrintTests(DebuggerTests):
     def test_getting_backtrace(self):
-        gdb_output = self.get_stack_trace('print(42)')
-        self.assertTrue('textiowrapper_write' in gdb_output)
+        gdb_output = self.get_stack_trace('id(42)')
+        self.assertTrue(BREAKPOINT_FN in gdb_output)
 
-    def assertGdbRepr(self, val, cmds_after_breakpoint=None):
+    def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
         # Ensure that gdb's rendering of the value in a debugged process
         # matches repr(value) in this process:
-        gdb_repr, gdb_output = self.get_gdb_repr('print(' + repr(val) + ')',
+        gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')',
                                                  cmds_after_breakpoint)
-        self.assertEquals(gdb_repr, repr(val), gdb_output)
+        if not exp_repr:
+            exp_repr = repr(val)
+        self.assertEquals(gdb_repr, exp_repr,
+                          ('%r did not equal expected %r; full output was:\n%s'
+                           % (gdb_repr, exp_repr, gdb_output)))
 
     def test_int(self):
-        'Verify the pretty-printing of various "int" values'
+        'Verify the pretty-printing of various "int"/long values'
         self.assertGdbRepr(42)
         self.assertGdbRepr(0)
         self.assertGdbRepr(-7)
-        self.assertGdbRepr(sys.maxint)
-        self.assertGdbRepr(-sys.maxint)
-
-    def test_long(self):
-        'Verify the pretty-printing of various "long" values'
-        self.assertGdbRepr(0)
         self.assertGdbRepr(1000000000000)
-        self.assertGdbRepr(-1)
         self.assertGdbRepr(-1000000000000000)
 
     def test_singletons(self):
@@ -202,27 +203,27 @@
     def test_lists(self):
         'Verify the pretty-printing of lists'
         self.assertGdbRepr([])
-        self.assertGdbRepr(range(5))
+        self.assertGdbRepr(list(range(5)))
+
+    def test_bytes(self):
+        'Verify the pretty-printing of bytes'
+        self.assertGdbRepr(b'')
+        self.assertGdbRepr(b'And now for something hopefully the same')
+        self.assertGdbRepr(b'string with embedded NUL here \0 and then some more text')
+        self.assertGdbRepr(b'this is a tab:\t'
+                           b' this is a slash-N:\n'
+                           b' this is a slash-R:\r'
+                           )
+
+        self.assertGdbRepr(b'this is byte 255:\xff and byte 128:\x80')
+
+        self.assertGdbRepr(bytes([b for b in range(255)]))
 
     def test_strings(self):
-        'Verify the pretty-printing of strings'
+        'Verify the pretty-printing of unicode strings'
         self.assertGdbRepr('')
         self.assertGdbRepr('And now for something hopefully the same')
         self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
-        self.assertGdbRepr('this is byte 255:\xff and byte 128:\x80')
-
-    def test_tuples(self):
-        'Verify the pretty-printing of tuples'
-        self.assertGdbRepr(tuple())
-        self.assertGdbRepr((1,))
-        self.assertGdbRepr(('foo', 'bar', 'baz'))
-
-    def test_unicode(self):
-        'Verify the pretty-printing of unicode values'
-        # Test the empty unicode string:
-        self.assertGdbRepr('')
-
-        self.assertGdbRepr('hello world')
 
         # Test printing a single character:
         #    U+2620 SKULL AND CROSSBONES
@@ -238,14 +239,19 @@
         # This is:
         # UTF-8: 0xF0 0x9D 0x84 0xA1
         # UTF-16: 0xD834 0xDD21
-        try:
-            # This will only work on wide-unicode builds:
-            self.assertGdbRepr(unichr(0x1D121))
-        except ValueError as e:
-            # We're probably on a narrow-unicode build; if we're seeing a
-            # different problem, then re-raise it:
-            if e.args != ('unichr() arg not in range(0x10000) (narrow Python build)',):
-                raise e
+        if sys.maxunicode == 0x10FFFF:
+            # wide unicode:
+            self.assertGdbRepr(chr(0x1D121))
+        else:
+            # narrow unicode:
+            self.assertGdbRepr(chr(0x1D121),
+                               "'\\U0000d834\\U0000dd21'")
+
+    def test_tuples(self):
+        'Verify the pretty-printing of tuples'
+        self.assertGdbRepr(tuple())
+        self.assertGdbRepr((1,), '(1,)')
+        self.assertGdbRepr(('foo', 'bar', 'baz'))
 
     def test_sets(self):
         'Verify the pretty-printing of sets'
@@ -253,12 +259,12 @@
         self.assertGdbRepr(set(['a', 'b']))
         self.assertGdbRepr(set([4, 5, 6]))
 
-        # Ensure that we handled sets containing the "dummy" key value,
+        # Ensure that we handle sets containing the "dummy" key value,
         # which happens on deletion:
         gdb_repr, gdb_output = self.get_gdb_repr('''s = set(['a','b'])
 s.pop()
-print(s)''')
-        self.assertEquals(gdb_repr, "set(['b'])")
+id(s)''')
+        self.assertEquals(gdb_repr, "{'b'}")
 
     def test_frozensets(self):
         'Verify the pretty-printing of frozensets'
@@ -271,43 +277,31 @@
         gdb_repr, gdb_output = self.get_gdb_repr('''
 try:
     raise RuntimeError("I am an error")
-except RuntimeError, e:
-    print(e)
+except RuntimeError as e:
+    id(e)
 ''')
         self.assertEquals(gdb_repr,
-                          "exceptions.RuntimeError('I am an error',)")
+                          "RuntimeError('I am an error',)")
 
 
         # Test division by zero:
         gdb_repr, gdb_output = self.get_gdb_repr('''
 try:
     a = 1 / 0
-except ZeroDivisionError, e:
-    print(e)
+except ZeroDivisionError as e:
+    id(e)
 ''')
         self.assertEquals(gdb_repr,
-                          "exceptions.ZeroDivisionError('integer division or modulo by zero',)")
-
-    def test_classic_class(self):
-        'Verify the pretty-printing of classic class instances'
-        gdb_repr, gdb_output = self.get_gdb_repr('''
-class Foo:
-    pass
-foo = Foo()
-foo.an_int = 42
-print(foo)''')
-        m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
-        self.assertTrue(m,
-                        msg='Unexpected classic-class rendering %r' % gdb_repr)
+                          "ZeroDivisionError('division by zero',)")
 
     def test_modern_class(self):
         'Verify the pretty-printing of new-style class instances'
         gdb_repr, gdb_output = self.get_gdb_repr('''
-class Foo(object):
+class Foo:
     pass
 foo = Foo()
 foo.an_int = 42
-print(foo)''')
+id(foo)''')
         m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
         self.assertTrue(m,
                         msg='Unexpected new-style class rendering %r' % gdb_repr)
@@ -320,8 +314,9 @@
 foo = Foo()
 foo += [1, 2, 3]
 foo.an_int = 42
-print(foo)''')
+id(foo)''')
         m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
+
         self.assertTrue(m,
                         msg='Unexpected new-style class rendering %r' % gdb_repr)
 
@@ -334,12 +329,13 @@
     pass
 foo = Foo((1, 2, 3))
 foo.an_int = 42
-print(foo)''')
+id(foo)''')
         m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
+
         self.assertTrue(m,
                         msg='Unexpected new-style class rendering %r' % gdb_repr)
 
-    def assertSane(self, source, corruption, expvalue=None, exptype=None):
+    def assertSane(self, source, corruption, exprepr=None):
         '''Run Python under gdb, corrupting variables in the inferior process
         immediately before taking a backtrace.
 
@@ -353,19 +349,15 @@
         gdb_repr, gdb_output = \
             self.get_gdb_repr(source,
                               cmds_after_breakpoint=cmds_after_breakpoint)
-
-        if expvalue:
-            if gdb_repr == repr(expvalue):
+        if exprepr:
+            if gdb_repr == exprepr:
                 # gdb managed to print the value in spite of the corruption;
                 # this is good (see http://bugs.python.org/issue8330)
                 return
 
-        if exptype:
-            pattern = '<' + exptype + ' at remote 0x[0-9a-f]+>'
-        else:
-            # Match anything for the type name; 0xDEADBEEF could point to
-            # something arbitrary (see  http://bugs.python.org/issue8330)
-            pattern = '<.* at remote 0x[0-9a-f]+>'
+        # Match anything for the type name; 0xDEADBEEF could point to
+        # something arbitrary (see  http://bugs.python.org/issue8330)
+        pattern = '<.* at remote 0x[0-9a-f]+>'
 
         m = re.match(pattern, gdb_repr)
         if not m:
@@ -375,8 +367,8 @@
     def test_NULL_ptr(self):
         'Ensure that a NULL PyObject* is handled gracefully'
         gdb_repr, gdb_output = (
-            self.get_gdb_repr('print(42)',
-                              cmds_after_breakpoint=['set variable op=0',
+            self.get_gdb_repr('id(42)',
+                              cmds_after_breakpoint=['set variable v=0',
                                                      'backtrace'])
             )
 
@@ -384,44 +376,33 @@
 
     def test_NULL_ob_type(self):
         'Ensure that a PyObject* with NULL ob_type is handled gracefully'
-        self.assertSane('print(42)',
-                        'set op->ob_type=0')
+        self.assertSane('id(42)',
+                        'set v->ob_type=0')
 
     def test_corrupt_ob_type(self):
         'Ensure that a PyObject* with a corrupt ob_type is handled gracefully'
-        self.assertSane('print(42)',
-                        'set op->ob_type=0xDEADBEEF',
-                        expvalue=42)
+        self.assertSane('id(42)',
+                        'set v->ob_type=0xDEADBEEF',
+                        exprepr='42')
 
     def test_corrupt_tp_flags(self):
         'Ensure that a PyObject* with a type with corrupt tp_flags is handled'
-        self.assertSane('print(42)',
-                        'set op->ob_type->tp_flags=0x0',
-                        expvalue=42)
+        self.assertSane('id(42)',
+                        'set v->ob_type->tp_flags=0x0',
+                        exprepr='42')
 
     def test_corrupt_tp_name(self):
         'Ensure that a PyObject* with a type with corrupt tp_name is handled'
-        self.assertSane('print(42)',
-                        'set op->ob_type->tp_name=0xDEADBEEF',
-                        expvalue=42)
-
-    def test_NULL_instance_dict(self):
-        'Ensure that a PyInstanceObject with with a NULL in_dict is handled'
-        self.assertSane('''
-class Foo:
-    pass
-foo = Foo()
-foo.an_int = 42
-print(foo)''',
-                        'set ((PyInstanceObject*)op)->in_dict = 0',
-                        exptype='Foo')
+        self.assertSane('id(42)',
+                        'set v->ob_type->tp_name=0xDEADBEEF',
+                        exprepr='42')
 
     def test_builtins_help(self):
         'Ensure that the new-style class _Helper in site.py can be handled'
         # (this was the issue causing tracebacks in
         #  http://bugs.python.org/issue8032#msg100537 )
+        gdb_repr, gdb_output = self.get_gdb_repr('id(__builtins__.help)', import_site=True)
 
-        gdb_repr, gdb_output = self.get_gdb_repr('print(__builtins__.help)', import_site=True)
         m = re.match(r'<_Helper at remote 0x[0-9a-f]+>', gdb_repr)
         self.assertTrue(m,
                         msg='Unexpected rendering %r' % gdb_repr)
@@ -430,20 +411,18 @@
         '''Ensure that a reference loop involving a list doesn't lead proxyval
         into an infinite loop:'''
         gdb_repr, gdb_output = \
-            self.get_gdb_repr("a = [3, 4, 5] ; a.append(a) ; print(a)")
-
+            self.get_gdb_repr("a = [3, 4, 5] ; a.append(a) ; id(a)")
         self.assertEquals(gdb_repr, '[3, 4, 5, [...]]')
 
         gdb_repr, gdb_output = \
-            self.get_gdb_repr("a = [3, 4, 5] ; b = [a] ; a.append(b) ; print(a)")
-
+            self.get_gdb_repr("a = [3, 4, 5] ; b = [a] ; a.append(b) ; id(a)")
         self.assertEquals(gdb_repr, '[3, 4, 5, [[...]]]')
 
     def test_selfreferential_dict(self):
         '''Ensure that a reference loop involving a dict doesn't lead proxyval
         into an infinite loop:'''
         gdb_repr, gdb_output = \
-            self.get_gdb_repr("a = {} ; b = {'bar':a} ; a['foo'] = b ; print(a)")
+            self.get_gdb_repr("a = {} ; b = {'bar':a} ; a['foo'] = b ; id(a)")
 
         self.assertEquals(gdb_repr, "{'foo': {'bar': {...}}}")
 
@@ -454,7 +433,7 @@
     pass
 foo = Foo()
 foo.an_attr = foo
-print(foo)''')
+id(foo)''')
         self.assertTrue(re.match('<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>',
                                  gdb_repr),
                         'Unexpected gdb representation: %r\n%s' % \
@@ -467,7 +446,7 @@
     pass
 foo = Foo()
 foo.an_attr = foo
-print(foo)''')
+id(foo)''')
         self.assertTrue(re.match('<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>',
                                  gdb_repr),
                         'Unexpected gdb representation: %r\n%s' % \
@@ -481,7 +460,7 @@
 b = Foo()
 a.an_attr = b
 b.an_attr = a
-print(a)''')
+id(a)''')
         self.assertTrue(re.match('<Foo\(an_attr=<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>\) at remote 0x[0-9a-f]+>',
                                  gdb_repr),
                         'Unexpected gdb representation: %r\n%s' % \
@@ -489,7 +468,7 @@
 
     def test_truncation(self):
         'Verify that very long output is truncated'
-        gdb_repr, gdb_output = self.get_gdb_repr('print(range(1000))')
+        gdb_repr, gdb_output = self.get_gdb_repr('id(list(range(1000)))')
         self.assertEquals(gdb_repr,
                           "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, "
                           "14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, "
@@ -515,13 +494,9 @@
         self.assertEquals(len(gdb_repr),
                           1024 + len('...(truncated)'))
 
-    def test_builtin_function(self):
-        gdb_repr, gdb_output = self.get_gdb_repr('print(len)')
-        self.assertEquals(gdb_repr, '<built-in function len>')
-
     def test_builtin_method(self):
-        gdb_repr, gdb_output = self.get_gdb_repr('import sys; print(sys.stdout.readlines)')
-        self.assertTrue(re.match('<built-in method readlines of file object at remote 0x[0-9a-f]+>',
+        gdb_repr, gdb_output = self.get_gdb_repr('import sys; id(sys.stdout.readlines)')
+        self.assertTrue(re.match('<built-in method readlines of _io.TextIOWrapper object at remote 0x[0-9a-f]+>',
                                  gdb_repr),
                         'Unexpected gdb representation: %r\n%s' % \
                             (gdb_repr, gdb_output))
@@ -532,11 +507,11 @@
     pass
 
 foo(3, 4, 5)
-print foo.__code__''',
-                                          breakpoint='textiowrapper_write',
-                                          cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)op)->co_zombieframe)']
+id(foo.__code__)''',
+                                          breakpoint='builtin_id',
+                                          cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)v)->co_zombieframe)']
                                           )
-        self.assertTrue(re.match(r'.*\s+\$1 =\s+Frame 0x[0-9a-f]+, for file <string>, line 3, in foo \(\)\s+.*',
+        self.assertTrue(re.match('.*\s+\$1 =\s+Frame 0x[0-9a-f]+, for file <string>, line 3, in foo \(\)\s+.*',
                                  gdb_output,
                                  re.DOTALL),
                         'Unexpected gdb representation: %r\n%s' % (gdb_output, gdb_output))
@@ -555,7 +530,7 @@
                            '   7        baz(a, b, c)\n'
                            '   8    \n'
                            '   9    def baz(*args):\n'
-                           ' >10        print(42)\n'
+                           ' >10        id(42)\n'
                            '  11    \n'
                            '  12    foo(1, 2, 3)\n',
                            bt)
@@ -566,7 +541,7 @@
                                   cmds_after_breakpoint=['py-list 9'])
 
         self.assertListing('   9    def baz(*args):\n'
-                           ' >10        print(42)\n'
+                           ' >10        id(42)\n'
                            '  11    \n'
                            '  12    foo(1, 2, 3)\n',
                            bt)
@@ -619,7 +594,7 @@
 #[0-9]+ Frame 0x[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
     baz\(a, b, c\)
 #[0-9]+ Frame 0x[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
-    print\(42\)
+    id\(42\)
 $''')
 
 class PyBtTests(DebuggerTests):
@@ -662,7 +637,7 @@
         bt = self.get_stack_trace(script=self.get_sample_script(),
                                   cmds_after_breakpoint=['py-print len'])
         self.assertMultilineMatches(bt,
-                                    r".*\nbuiltin 'len' = <built-in function len>\n.*")
+                                    r".*\nbuiltin 'len' = <built-in method len of module object at remote 0x[0-9a-f]+>\n.*")
 
 class PyLocalsTests(DebuggerTests):
     def test_basic_command(self):

Modified: python/branches/py3k/Tools/gdb/libpython.py
==============================================================================
--- python/branches/py3k/Tools/gdb/libpython.py	(original)
+++ python/branches/py3k/Tools/gdb/libpython.py	Thu Apr 22 00:38:42 2010
@@ -20,8 +20,9 @@
 process, we can generate a "proxy value" within the gdb process.  For example,
 given a PyObject* in the inferior process that is in fact a PyListObject*
 holding three PyObject* that turn out to be PyBytesObject* instances, we can
-generate a proxy value within the gdb process that is a list of strings:
-  ["foo", "bar", "baz"]
+generate a proxy value within the gdb process that is a list of bytes
+instances:
+  [b"foo", b"bar", b"baz"]
 
 Doing so can be expensive for complicated graphs of objects, and could take
 some time, so we also have a "write_repr" method that writes a representation
@@ -57,7 +58,7 @@
 Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
 Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
 Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
-Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
+Py_TPFLAGS_BYTES_SUBCLASS    = (1L << 27)
 Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
 Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
 Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
@@ -66,6 +67,9 @@
 
 MAX_OUTPUT_LEN=1024
 
+hexdigits = "0123456789abcdef"
+
+
 class NullPyObjectPtr(RuntimeError):
     pass
 
@@ -148,12 +152,8 @@
             return pyo_ptr.dereference()[name]
 
         if name == 'ob_size':
-            try:
-            # Python 2:
-                return self._gdbval.dereference()[name]
-            except RuntimeError:
-                # Python 3:
-                return self._gdbval.dereference()['ob_base'][name]
+            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
 
         # General case: look it up inside the object:
         return self._gdbval.dereference()[name]
@@ -318,7 +318,7 @@
             return PyListObjectPtr
         if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
             return PyTupleObjectPtr
-        if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
+        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
             return PyBytesObjectPtr
         if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
             return PyUnicodeObjectPtr
@@ -355,6 +355,8 @@
     def as_address(self):
         return long(self._gdbval)
 
+class PyVarObjectPtr(PyObjectPtr):
+    _typename = 'PyVarObject'
 
 class ProxyAlreadyVisited(object):
     '''
@@ -515,20 +517,6 @@
         out.write(self.safe_tp_name())
         self.write_field_repr('args', out, visited)
 
-class PyBoolObjectPtr(PyObjectPtr):
-    """
-    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
-    <bool> instances (Py_True/Py_False) within the process being debugged.
-    """
-    _typename = 'PyBoolObject'
-
-    def proxyval(self, visited):
-        if int_from_int(self.field('ob_ival')):
-            return True
-        else:
-            return False
-
-
 class PyClassObjectPtr(PyObjectPtr):
     """
     Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
@@ -691,13 +679,6 @@
         _write_instance_repr(out, visited,
                              cl_name, pyop_in_dict, self.as_address())
 
-class PyIntObjectPtr(PyObjectPtr):
-    _typename = 'PyIntObject'
-
-    def proxyval(self, visited):
-        result = int_from_int(self.field('ob_ival'))
-        return result
-
 class PyListObjectPtr(PyObjectPtr):
     _typename = 'PyListObject'
 
@@ -770,6 +751,22 @@
             result = -result
         return result
 
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+        proxy = self.proxyval(visited)
+        out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+    <bool> instances (Py_True/Py_False) within the process being debugged.
+    """
+    def proxyval(self, visited):
+        if PyLongObjectPtr.proxyval(self, visited):
+            return True
+        else:
+            return False
 
 class PyNoneStructPtr(PyObjectPtr):
     """
@@ -894,9 +891,9 @@
             return
         out.write('Frame 0x%x, for file %s, line %i, in %s ('
                   % (self.as_address(),
-                     self.co_filename,
+                     self.co_filename.proxyval(visited),
                      self.current_line_num(),
-                     self.co_name))
+                     self.co_name.proxyval(visited)))
         first = True
         for pyop_name, pyop_value in self.iter_locals():
             if not first:
@@ -933,7 +930,8 @@
             return set(members)
 
     def write_repr(self, out, visited):
-        out.write(self.safe_tp_name())
+        # Emulate Python 3's set_repr
+        tp_name = self.safe_tp_name()
 
         # Guard against infinite loops:
         if self.as_address() in visited:
@@ -941,7 +939,18 @@
             return
         visited.add(self.as_address())
 
-        out.write('([')
+        # Python 3's set_repr special-cases the empty set:
+        if not self.field('used'):
+            out.write(tp_name)
+            out.write('()')
+            return
+
+        # Python 3 uses {} for set literals:
+        if tp_name != 'set':
+            out.write(tp_name)
+            out.write('(')
+
+        out.write('{')
         first = True
         table = self.field('table')
         for i in safe_range(self.field('mask')+1):
@@ -955,7 +964,10 @@
                         out.write(', ')
                     first = False
                     pyop_key.write_repr(out, visited)
-        out.write('])')
+        out.write('}')
+
+        if tp_name != 'set':
+            out.write(')')
 
 
 class PyBytesObjectPtr(PyObjectPtr):
@@ -970,6 +982,37 @@
     def proxyval(self, visited):
         return str(self)
 
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+        # Get a PyStringObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+        # to Python 2 code:
+        quote = "'"
+        if "'" in proxy and not '"' in proxy:
+            quote = '"'
+        out.write('b')
+        out.write(quote)
+        for byte in proxy:
+            if byte == quote or byte == '\\':
+                out.write('\\')
+                out.write(byte)
+            elif byte == '\t':
+                out.write('\\t')
+            elif byte == '\n':
+                out.write('\\n')
+            elif byte == '\r':
+                out.write('\\r')
+            elif byte < ' ' or ord(byte) >= 0x7f:
+                out.write('\\x')
+                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+                out.write(hexdigits[ord(byte) & 0xf])
+            else:
+                out.write(byte)
+        out.write(quote)
+
 class PyTupleObjectPtr(PyObjectPtr):
     _typename = 'PyTupleObject'
 
@@ -1010,9 +1053,21 @@
     _typename = 'PyTypeObject'
 
 
+def _unichr_is_printable(char):
+    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+    if char == u" ":
+        return True
+    import unicodedata
+    return unicodedata.category(char)[0] not in ("C", "Z")
+
+
 class PyUnicodeObjectPtr(PyObjectPtr):
     _typename = 'PyUnicodeObject'
 
+    def char_width(self):
+        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
+        return _type_Py_UNICODE.sizeof
+
     def proxyval(self, visited):
         # From unicodeobject.h:
         #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
@@ -1029,6 +1084,102 @@
         result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
         return result
 
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
+
+        # Get a PyUnicodeObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+        # to Python 2:
+        if "'" in proxy and '"' not in proxy:
+            quote = '"'
+        else:
+            quote = "'"
+        out.write(quote)
+
+        i = 0
+        while i < len(proxy):
+            ch = proxy[i]
+            i += 1
+
+            # Escape quotes and backslashes
+            if ch == quote or ch == '\\':
+                out.write('\\')
+                out.write(ch)
+
+            #  Map special whitespace to '\t', \n', '\r'
+            elif ch == '\t':
+                out.write('\\t')
+            elif ch == '\n':
+                out.write('\\n')
+            elif ch == '\r':
+                out.write('\\r')
+
+            # Map non-printable US ASCII to '\xhh' */
+            elif ch < ' ' or ch == 0x7F:
+                out.write('\\x')
+                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+                out.write(hexdigits[ord(ch) & 0x000F])
+
+            # Copy ASCII characters as-is
+            elif ord(ch) < 0x7F:
+                out.write(ch)
+
+            # Non-ASCII characters
+            else:
+                ucs = ch;
+
+                if self.char_width == 2:
+                    ch2 = 0
+                    # Get code point from surrogate pair
+                    if i < len(proxy):
+                        ch2 = proxy[i]
+                        if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00
+                            and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
+                            ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+                            i += 1
+
+                # Map Unicode whitespace and control characters
+                # (categories Z* and C* except ASCII space)
+                if not _unichr_is_printable(ucs):
+                    # Unfortuately, Python 2's unicode type doesn't seem
+                    # to expose the "isprintable" method
+
+                    # Map 8-bit characters to '\\xhh'
+                    if ucs <= 0xff:
+                        out.write('\\x')
+                        out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
+                        out.write(hexdigits[ord(ucs) & 0x000F])
+                    # Map 21-bit characters to '\U00xxxxxx'
+                    elif ucs >= 0x10000:
+                        out.write('\\U')
+                        out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F])
+                        out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F])
+                        out.write(hexdigits[ord(ucs) & 0x0000000F])
+                    # Map 16-bit characters to '\uxxxx'
+                    else:
+                        out.write('\\u')
+                        out.write(hexdigits[(ord(ucs) >> 12) & 0x000F])
+                        out.write(hexdigits[(ord(ucs) >> 8) & 0x000F])
+                        out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
+                        out.write(hexdigits[ord(ucs) & 0x000F])
+                else:
+                    # Copy characters as-is
+                    out.write(ch)
+                    if self.char_width == 2:
+                        if ord(ucs) >= 0x10000:
+                            out.write(ch2)
+
+        out.write(quote)
+
+
+
 
 def int_from_int(gdbval):
     return int(str(gdbval))
@@ -1065,7 +1216,7 @@
     if type.code == gdb.TYPE_CODE_PTR:
         type = type.target().unqualified()
         t = str(type)
-        if t in ("PyObject", "PyFrameObject"):
+        if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
             return PyObjectPtrPrinter(gdbval)
 
 """


More information about the Python-checkins mailing list