[Python-checkins] bpo-43950: Print columns in tracebacks (PEP 657) (GH-26958)

pablogsal webhook-mailer at python.org
Sun Jul 4 19:14:41 EDT 2021


https://github.com/python/cpython/commit/5644c7b3ffd49bed58dc095be6e6148e0bb4431e
commit: 5644c7b3ffd49bed58dc095be6e6148e0bb4431e
branch: main
author: Ammar Askar <ammar at ammaraskar.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-07-05T00:14:33+01:00
summary:

bpo-43950: Print columns in tracebacks (PEP 657) (GH-26958)

The traceback.c and traceback.py mechanisms now utilize the newly added code.co_positions and PyCode_Addr2Location
to print carets on the specific expressions involved in a traceback.

Co-authored-by: Pablo Galindo <Pablogsal at gmail.com>
Co-authored-by: Ammar Askar <ammar at ammaraskar.com>
Co-authored-by: Batuhan Taskaya <batuhanosmantaskaya at gmail.com>

files:
M Doc/library/traceback.rst
M Include/cpython/traceback.h
M Lib/idlelib/idle_test/test_run.py
M Lib/test/test_cmd_line_script.py
M Lib/test/test_doctest.py
M Lib/test/test_traceback.py
M Lib/test/test_zipimport.py
M Lib/traceback.py
M Parser/pegen.c
M Parser/pegen.h
M Python/_warnings.c
M Python/traceback.c

diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst
index bd53bc066becc..1961b9a435bd3 100644
--- a/Doc/library/traceback.rst
+++ b/Doc/library/traceback.rst
@@ -447,37 +447,42 @@ The output for the example would look similar to this:
    *** print_tb:
      File "<doctest...>", line 10, in <module>
        lumberjack()
+       ^^^^^^^^^^^^
    *** print_exception:
    Traceback (most recent call last):
      File "<doctest...>", line 10, in <module>
        lumberjack()
+       ^^^^^^^^^^^^
      File "<doctest...>", line 4, in lumberjack
        bright_side_of_death()
+       ^^^^^^^^^^^^^^^^^^^^^^
    IndexError: tuple index out of range
    *** print_exc:
    Traceback (most recent call last):
      File "<doctest...>", line 10, in <module>
        lumberjack()
+       ^^^^^^^^^^^^
      File "<doctest...>", line 4, in lumberjack
        bright_side_of_death()
+       ^^^^^^^^^^^^^^^^^^^^^^
    IndexError: tuple index out of range
    *** format_exc, first and last line:
    Traceback (most recent call last):
    IndexError: tuple index out of range
    *** format_exception:
    ['Traceback (most recent call last):\n',
-    '  File "<doctest...>", line 10, in <module>\n    lumberjack()\n',
-    '  File "<doctest...>", line 4, in lumberjack\n    bright_side_of_death()\n',
-    '  File "<doctest...>", line 7, in bright_side_of_death\n    return tuple()[0]\n',
+    '  File "<doctest default[0]>", line 10, in <module>\n    lumberjack()\n    ^^^^^^^^^^^^\n',
+    '  File "<doctest default[0]>", line 4, in lumberjack\n    bright_side_of_death()\n    ^^^^^^^^^^^^^^^^^^^^^^\n',
+    '  File "<doctest default[0]>", line 7, in bright_side_of_death\n    return tuple()[0]\n           ^^^^^^^^^^\n',
     'IndexError: tuple index out of range\n']
    *** extract_tb:
    [<FrameSummary file <doctest...>, line 10 in <module>>,
     <FrameSummary file <doctest...>, line 4 in lumberjack>,
     <FrameSummary file <doctest...>, line 7 in bright_side_of_death>]
    *** format_tb:
-   ['  File "<doctest...>", line 10, in <module>\n    lumberjack()\n',
-    '  File "<doctest...>", line 4, in lumberjack\n    bright_side_of_death()\n',
-    '  File "<doctest...>", line 7, in bright_side_of_death\n    return tuple()[0]\n']
+   ['  File "<doctest default[0]>", line 10, in <module>\n    lumberjack()\n    ^^^^^^^^^^^^\n',
+    '  File "<doctest default[0]>", line 4, in lumberjack\n    bright_side_of_death()\n    ^^^^^^^^^^^^^^^^^^^^^^\n',
+    '  File "<doctest default[0]>", line 7, in bright_side_of_death\n    return tuple()[0]\n           ^^^^^^^^^^\n']
    *** tb_lineno: 10
 
 
diff --git a/Include/cpython/traceback.h b/Include/cpython/traceback.h
index aac5b42c344d3..d0dde335cfee5 100644
--- a/Include/cpython/traceback.h
+++ b/Include/cpython/traceback.h
@@ -10,5 +10,5 @@ typedef struct _traceback {
     int tb_lineno;
 } PyTracebackObject;
 
-PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, PyObject *, int, int);
+PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, PyObject *, int, int, int *, PyObject **);
 PyAPI_FUNC(void) _PyTraceback_Add(const char *, const char *, int);
diff --git a/Lib/idlelib/idle_test/test_run.py b/Lib/idlelib/idle_test/test_run.py
index ec4637c5ca617..b289fa7cbd648 100644
--- a/Lib/idlelib/idle_test/test_run.py
+++ b/Lib/idlelib/idle_test/test_run.py
@@ -33,9 +33,9 @@ def __eq__(self, other):
                         run.print_exception()
 
         tb = output.getvalue().strip().splitlines()
-        self.assertEqual(11, len(tb))
-        self.assertIn('UnhashableException: ex2', tb[3])
-        self.assertIn('UnhashableException: ex1', tb[10])
+        self.assertEqual(13, len(tb))
+        self.assertIn('UnhashableException: ex2', tb[4])
+        self.assertIn('UnhashableException: ex1', tb[12])
 
     data = (('1/0', ZeroDivisionError, "division by zero\n"),
             ('abc', NameError, "name 'abc' is not defined. "
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 6ffec918ebbd5..e50c992579991 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -548,10 +548,10 @@ def test_pep_409_verbiage(self):
             script_name = _make_test_script(script_dir, 'script', script)
             exitcode, stdout, stderr = assert_python_failure(script_name)
             text = stderr.decode('ascii').split('\n')
-            self.assertEqual(len(text), 5)
+            self.assertEqual(len(text), 6)
             self.assertTrue(text[0].startswith('Traceback'))
             self.assertTrue(text[1].startswith('  File '))
-            self.assertTrue(text[3].startswith('NameError'))
+            self.assertTrue(text[4].startswith('NameError'))
 
     def test_non_ascii(self):
         # Mac OS X denies the creation of a file with an invalid UTF-8 name.
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py
index 828a0ff56763a..06d9d5d4ade83 100644
--- a/Lib/test/test_doctest.py
+++ b/Lib/test/test_doctest.py
@@ -2835,6 +2835,7 @@ def test_unicode(): """
             exec(compile(example.source, filename, "single",
           File "<doctest foo-bär at baz[0]>", line 1, in <module>
             raise Exception('clé')
+            ^^^^^^^^^^^^^^^^^^^^^^
         Exception: clé
     TestResults(failed=1, attempted=1)
     """
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 78b2851d38494..54f592a8ea500 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -17,8 +17,9 @@
 
 
 test_code = namedtuple('code', ['co_filename', 'co_name'])
+test_code.co_positions = lambda _: iter([(6, 6, 0, 0)])
 test_frame = namedtuple('frame', ['f_code', 'f_globals', 'f_locals'])
-test_tb = namedtuple('tb', ['tb_frame', 'tb_lineno', 'tb_next'])
+test_tb = namedtuple('tb', ['tb_frame', 'tb_lineno', 'tb_next', 'tb_lasti'])
 
 
 class TracebackCases(unittest.TestCase):
@@ -154,9 +155,9 @@ def do_test(firstlines, message, charset, lineno):
             self.assertTrue(stdout[2].endswith(err_line),
                 "Invalid traceback line: {0!r} instead of {1!r}".format(
                     stdout[2], err_line))
-            self.assertTrue(stdout[3] == err_msg,
+            self.assertTrue(stdout[4] == err_msg,
                 "Invalid error message: {0!r} instead of {1!r}".format(
-                    stdout[3], err_msg))
+                    stdout[4], err_msg))
 
         do_test("", "foo", "ascii", 3)
         for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"):
@@ -272,6 +273,114 @@ def test_signatures(self):
             '(exc, /, value=<implicit>)')
 
 
+class TracebackErrorLocationCaretTests(unittest.TestCase):
+    """
+    Tests for printing code error expressions as part of PEP 657
+    """
+    def get_exception(self, callable):
+        try:
+            callable()
+            self.fail("No exception thrown.")
+        except:
+            return traceback.format_exc().splitlines()[:-1]
+
+    callable_line = get_exception.__code__.co_firstlineno + 2
+
+    def test_basic_caret(self):
+        def f():
+            raise ValueError("basic caret tests")
+
+        lineno_f = f.__code__.co_firstlineno
+        expected_f = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f\n'
+            '    raise ValueError("basic caret tests")\n'
+            '    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
+        )
+        result_lines = self.get_exception(f)
+        self.assertEqual(result_lines, expected_f.splitlines())
+
+    def test_line_with_unicode(self):
+        # Make sure that even if a line contains multi-byte unicode characters
+        # the correct carets are printed.
+        def f_with_unicode():
+            raise ValueError("Ĥellö Wörld")
+
+        lineno_f = f_with_unicode.__code__.co_firstlineno
+        expected_f = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f_with_unicode\n'
+            '    raise ValueError("Ĥellö Wörld")\n'
+            '    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
+        )
+        result_lines = self.get_exception(f_with_unicode)
+        self.assertEqual(result_lines, expected_f.splitlines())
+
+    def test_caret_in_type_annotation(self):
+        def f_with_type():
+            def foo(a: THIS_DOES_NOT_EXIST ) -> int:
+                return 0
+
+        lineno_f = f_with_type.__code__.co_firstlineno
+        expected_f = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f_with_type\n'
+            '    def foo(a: THIS_DOES_NOT_EXIST ) -> int:\n'
+            '               ^^^^^^^^^^^^^^^^^^^\n'
+        )
+        result_lines = self.get_exception(f_with_type)
+        self.assertEqual(result_lines, expected_f.splitlines())
+
+    def test_caret_multiline_expression(self):
+        # Make sure no carets are printed for expressions spanning multiple
+        # lines.
+        def f_with_multiline():
+            raise ValueError(
+                "error over multiple lines"
+            )
+
+        lineno_f = f_with_multiline.__code__.co_firstlineno
+        expected_f = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f_with_multiline\n'
+            '    raise ValueError(\n'
+        )
+        result_lines = self.get_exception(f_with_multiline)
+        self.assertEqual(result_lines, expected_f.splitlines())
+
+
+ at cpython_only
+class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests):
+    """
+    Same set of tests as above but with Python's internal traceback printing.
+    """
+    def get_exception(self, callable):
+        from _testcapi import traceback_print
+        try:
+            callable()
+            self.fail("No exception thrown.")
+        except:
+            type_, value, tb = sys.exc_info()
+
+            file_ = StringIO()
+            traceback_print(tb, file_)
+            return file_.getvalue().splitlines()
+
+    callable_line = get_exception.__code__.co_firstlineno + 3
+
+
 class TracebackFormatTests(unittest.TestCase):
 
     def some_exception(self):
@@ -315,9 +424,9 @@ def check_traceback_format(self, cleanup_func=None):
 
         # Make sure that the traceback is properly indented.
         tb_lines = python_fmt.splitlines()
-        self.assertEqual(len(tb_lines), 5)
+        self.assertEqual(len(tb_lines), 7)
         banner = tb_lines[0]
-        location, source_line = tb_lines[-2:]
+        location, source_line = tb_lines[-3], tb_lines[-2]
         self.assertTrue(banner.startswith('Traceback'))
         self.assertTrue(location.startswith('  File'))
         self.assertTrue(source_line.startswith('    raise'))
@@ -381,12 +490,16 @@ def f():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n'
             '    f()\n'
+            '    ^^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    f()\n'
+            '    ^^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    f()\n'
+            '    ^^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    f()\n'
+            '    ^^^\n'
             # XXX: The following line changes depending on whether the tests
             # are run through the interactive interpreter or with -m
             # It also varies depending on the platform (stack size)
@@ -427,19 +540,24 @@ def g(count=10):
         result_g = (
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             '  [Previous line repeated 7 more times]\n'
             f'  File "{__file__}", line {lineno_g+3}, in g\n'
             '    raise ValueError\n'
+            '    ^^^^^^^^^^^^^^^^\n'
             'ValueError\n'
         )
         tb_line = (
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n'
             '    g()\n'
+            '    ^^^\n'
         )
         expected = (tb_line + result_g).splitlines()
         actual = stderr_g.getvalue().splitlines()
@@ -464,15 +582,20 @@ def h(count=10):
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n'
             '    h()\n'
+            '    ^^^\n'
             f'  File "{__file__}", line {lineno_h+2}, in h\n'
             '    return h(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_h+2}, in h\n'
             '    return h(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_h+2}, in h\n'
             '    return h(count-1)\n'
+            '           ^^^^^^^^^^\n'
             '  [Previous line repeated 7 more times]\n'
             f'  File "{__file__}", line {lineno_h+3}, in h\n'
             '    g()\n'
+            '    ^^^\n'
         )
         expected = (result_h + result_g).splitlines()
         actual = stderr_h.getvalue().splitlines()
@@ -489,18 +612,23 @@ def h(count=10):
         result_g = (
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+3}, in g\n'
             '    raise ValueError\n'
+            '    ^^^^^^^^^^^^^^^^\n'
             'ValueError\n'
         )
         tb_line = (
             'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_g+71}, in _check_recursive_traceback_display\n'
+            f'  File "{__file__}", line {lineno_g+81}, in _check_recursive_traceback_display\n'
             '    g(traceback._RECURSIVE_CUTOFF)\n'
+            '    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
         )
         expected = (tb_line + result_g).splitlines()
         actual = stderr_g.getvalue().splitlines()
@@ -517,19 +645,24 @@ def h(count=10):
         result_g = (
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
+            '           ^^^^^^^^^^\n'
             '  [Previous line repeated 1 more time]\n'
             f'  File "{__file__}", line {lineno_g+3}, in g\n'
             '    raise ValueError\n'
+            '    ^^^^^^^^^^^^^^^^\n'
             'ValueError\n'
         )
         tb_line = (
             'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_g+99}, in _check_recursive_traceback_display\n'
+            f'  File "{__file__}", line {lineno_g+114}, in _check_recursive_traceback_display\n'
             '    g(traceback._RECURSIVE_CUTOFF + 1)\n'
+            '    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
         )
         expected = (tb_line + result_g).splitlines()
         actual = stderr_g.getvalue().splitlines()
@@ -580,10 +713,10 @@ def __eq__(self, other):
             exception_print(exc_val)
 
         tb = stderr_f.getvalue().strip().splitlines()
-        self.assertEqual(11, len(tb))
-        self.assertEqual(context_message.strip(), tb[5])
-        self.assertIn('UnhashableException: ex2', tb[3])
-        self.assertIn('UnhashableException: ex1', tb[10])
+        self.assertEqual(13, len(tb))
+        self.assertEqual(context_message.strip(), tb[6])
+        self.assertIn('UnhashableException: ex2', tb[4])
+        self.assertIn('UnhashableException: ex1', tb[12])
 
 
 cause_message = (
@@ -613,8 +746,8 @@ def zero_div(self):
 
     def check_zero_div(self, msg):
         lines = msg.splitlines()
-        self.assertTrue(lines[-3].startswith('  File'))
-        self.assertIn('1/0 # In zero_div', lines[-2])
+        self.assertTrue(lines[-4].startswith('  File'))
+        self.assertIn('1/0 # In zero_div', lines[-3])
         self.assertTrue(lines[-1].startswith('ZeroDivisionError'), lines[-1])
 
     def test_simple(self):
@@ -623,11 +756,11 @@ def test_simple(self):
         except ZeroDivisionError as _:
             e = _
         lines = self.get_report(e).splitlines()
-        self.assertEqual(len(lines), 4)
+        self.assertEqual(len(lines), 5)
         self.assertTrue(lines[0].startswith('Traceback'))
         self.assertTrue(lines[1].startswith('  File'))
         self.assertIn('1/0 # Marker', lines[2])
-        self.assertTrue(lines[3].startswith('ZeroDivisionError'))
+        self.assertTrue(lines[4].startswith('ZeroDivisionError'))
 
     def test_cause(self):
         def inner_raise():
@@ -666,11 +799,11 @@ def test_context_suppression(self):
         except ZeroDivisionError as _:
             e = _
         lines = self.get_report(e).splitlines()
-        self.assertEqual(len(lines), 4)
+        self.assertEqual(len(lines), 5)
         self.assertTrue(lines[0].startswith('Traceback'))
         self.assertTrue(lines[1].startswith('  File'))
         self.assertIn('ZeroDivisionError from None', lines[2])
-        self.assertTrue(lines[3].startswith('ZeroDivisionError'))
+        self.assertTrue(lines[4].startswith('ZeroDivisionError'))
 
     def test_cause_and_context(self):
         # When both a cause and a context are set, only the cause should be
@@ -1362,7 +1495,7 @@ def test_lookup_lines(self):
         e = Exception("uh oh")
         c = test_code('/foo.py', 'method')
         f = test_frame(c, None, None)
-        tb = test_tb(f, 6, None)
+        tb = test_tb(f, 6, None, 0)
         exc = traceback.TracebackException(Exception, e, tb, lookup_lines=False)
         self.assertEqual(linecache.cache, {})
         linecache.updatecache('/foo.py', globals())
@@ -1373,7 +1506,7 @@ def test_locals(self):
         e = Exception("uh oh")
         c = test_code('/foo.py', 'method')
         f = test_frame(c, globals(), {'something': 1, 'other': 'string'})
-        tb = test_tb(f, 6, None)
+        tb = test_tb(f, 6, None, 0)
         exc = traceback.TracebackException(
             Exception, e, tb, capture_locals=True)
         self.assertEqual(
@@ -1384,7 +1517,7 @@ def test_no_locals(self):
         e = Exception("uh oh")
         c = test_code('/foo.py', 'method')
         f = test_frame(c, globals(), {'something': 1})
-        tb = test_tb(f, 6, None)
+        tb = test_tb(f, 6, None, 0)
         exc = traceback.TracebackException(Exception, e, tb)
         self.assertEqual(exc.stack[0].locals, None)
 
@@ -1405,8 +1538,9 @@ def f():
         output = StringIO()
         exc.print(file=output)
         self.assertEqual(
-            output.getvalue().split('\n')[-4:],
+            output.getvalue().split('\n')[-5:],
             ['    x/0',
+             '    ^^^',
              '    x = 12',
              'ZeroDivisionError: division by zero',
              ''])
diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
index dfc4a060ec6cc..861ebe347b3ee 100644
--- a/Lib/test/test_zipimport.py
+++ b/Lib/test/test_zipimport.py
@@ -716,7 +716,10 @@ def doTraceback(self, module):
 
             s = io.StringIO()
             print_tb(tb, 1, s)
-            self.assertTrue(s.getvalue().endswith(raise_src))
+            self.assertTrue(s.getvalue().endswith(
+                '    def do_raise(): raise TypeError\n'
+                '                    ^^^^^^^^^^^^^^^\n'
+            ))
         else:
             raise AssertionError("This ought to be impossible")
 
diff --git a/Lib/traceback.py b/Lib/traceback.py
index b4c7641addec7..cf1ba2a1a49ac 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -69,7 +69,8 @@ def extract_tb(tb, limit=None):
     trace.  The line is a string with leading and trailing
     whitespace stripped; if the source is not available it is None.
     """
-    return StackSummary.extract(walk_tb(tb), limit=limit)
+    return StackSummary._extract_from_extended_frame_gen(
+        _walk_tb_with_full_positions(tb), limit=limit)
 
 #
 # Exception formatting and output.
@@ -251,10 +252,12 @@ class FrameSummary:
       mapping the name to the repr() of the variable.
     """
 
-    __slots__ = ('filename', 'lineno', 'name', '_line', 'locals')
+    __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno',
+                 'name', '_line', 'locals')
 
     def __init__(self, filename, lineno, name, *, lookup_line=True,
-            locals=None, line=None):
+            locals=None, line=None,
+            end_lineno=None, colno=None, end_colno=None):
         """Construct a FrameSummary.
 
         :param lookup_line: If True, `linecache` is consulted for the source
@@ -271,6 +274,9 @@ def __init__(self, filename, lineno, name, *, lookup_line=True,
         if lookup_line:
             self.line
         self.locals = {k: repr(v) for k, v in locals.items()} if locals else None
+        self.end_lineno = end_lineno
+        self.colno = colno
+        self.end_colno = end_colno
 
     def __eq__(self, other):
         if isinstance(other, FrameSummary):
@@ -295,11 +301,17 @@ def __repr__(self):
     def __len__(self):
         return 4
 
+    @property
+    def _original_line(self):
+        # Returns the line as-is from the source, without modifying whitespace.
+        self.line
+        return self._line
+
     @property
     def line(self):
         if self._line is None:
-            self._line = linecache.getline(self.filename, self.lineno).strip()
-        return self._line
+            self._line = linecache.getline(self.filename, self.lineno)
+        return self._line.strip()
 
 
 def walk_stack(f):
@@ -309,7 +321,7 @@ def walk_stack(f):
     current stack is used. Usually used with StackSummary.extract.
     """
     if f is None:
-        f = sys._getframe().f_back.f_back
+        f = sys._getframe().f_back.f_back.f_back.f_back
     while f is not None:
         yield f, f.f_lineno
         f = f.f_back
@@ -326,6 +338,27 @@ def walk_tb(tb):
         tb = tb.tb_next
 
 
+def _walk_tb_with_full_positions(tb):
+    # Internal version of walk_tb that yields full code positions including
+    # end line and column information.
+    while tb is not None:
+        positions = _get_code_position(tb.tb_frame.f_code, tb.tb_lasti)
+        # Yield tb_lineno when co_positions does not have a line number to
+        # maintain behavior with walk_tb.
+        if positions[0] is None:
+            yield tb.tb_frame, (tb.tb_lineno, ) + positions[1:]
+        else:
+            yield tb.tb_frame, positions
+        tb = tb.tb_next
+
+
+def _get_code_position(code, instruction_index):
+    if instruction_index < 0:
+        return (None, None, None, None)
+    positions_gen = code.co_positions()
+    return next(itertools.islice(positions_gen, instruction_index // 2, None))
+
+
 _RECURSIVE_CUTOFF = 3 # Also hardcoded in traceback.c.
 
 class StackSummary(list):
@@ -345,6 +378,21 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True,
         :param capture_locals: If True, the local variables from each frame will
             be captured as object representations into the FrameSummary.
         """
+        def extended_frame_gen():
+            for f, lineno in frame_gen:
+                yield f, (lineno, None, None, None)
+
+        return klass._extract_from_extended_frame_gen(
+            extended_frame_gen(), limit=limit, lookup_lines=lookup_lines,
+            capture_locals=capture_locals)
+
+    @classmethod
+    def _extract_from_extended_frame_gen(klass, frame_gen, *, limit=None,
+            lookup_lines=True, capture_locals=False):
+        # Same as extract but operates on a frame generator that yields
+        # (frame, (lineno, end_lineno, colno, end_colno)) in the stack.
+        # Only lineno is required, the remaining fields can be empty if the
+        # information is not available.
         if limit is None:
             limit = getattr(sys, 'tracebacklimit', None)
             if limit is not None and limit < 0:
@@ -357,7 +405,7 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True,
 
         result = klass()
         fnames = set()
-        for f, lineno in frame_gen:
+        for f, (lineno, end_lineno, colno, end_colno) in frame_gen:
             co = f.f_code
             filename = co.co_filename
             name = co.co_name
@@ -370,7 +418,8 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True,
             else:
                 f_locals = None
             result.append(FrameSummary(
-                filename, lineno, name, lookup_line=False, locals=f_locals))
+                filename, lineno, name, lookup_line=False, locals=f_locals,
+                end_lineno=end_lineno, colno=colno, end_colno=end_colno))
         for filename in fnames:
             linecache.checkcache(filename)
         # If immediate lookup was desired, trigger lookups now.
@@ -437,6 +486,17 @@ def format(self):
                 frame.filename, frame.lineno, frame.name))
             if frame.line:
                 row.append('    {}\n'.format(frame.line.strip()))
+
+                stripped_characters = len(frame._original_line) - len(frame.line.lstrip())
+                if frame.end_lineno == frame.lineno and frame.end_colno != 0:
+                    colno = _byte_offset_to_character_offset(frame._original_line, frame.colno)
+                    end_colno = _byte_offset_to_character_offset(frame._original_line, frame.end_colno)
+
+                    row.append('    ')
+                    row.append(' ' * (colno - stripped_characters))
+                    row.append('^' * (end_colno - colno))
+                    row.append('\n')
+
             if frame.locals:
                 for name, value in sorted(frame.locals.items()):
                     row.append('    {name} = {value}\n'.format(name=name, value=value))
@@ -450,6 +510,14 @@ def format(self):
         return result
 
 
+def _byte_offset_to_character_offset(str, offset):
+    as_utf8 = str.encode('utf-8')
+    if offset > len(as_utf8):
+        offset = len(as_utf8)
+
+    return len(as_utf8[:offset + 1].decode("utf-8"))
+
+
 class TracebackException:
     """An exception ready for rendering.
 
@@ -491,8 +559,9 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None,
         _seen.add(id(exc_value))
 
         # TODO: locals.
-        self.stack = StackSummary.extract(
-            walk_tb(exc_traceback), limit=limit, lookup_lines=lookup_lines,
+        self.stack = StackSummary._extract_from_extended_frame_gen(
+            _walk_tb_with_full_positions(exc_traceback),
+            limit=limit, lookup_lines=lookup_lines,
             capture_locals=capture_locals)
         self.exc_type = exc_type
         # Capture now to permit freeing resources: only complication is in the
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 3472d489e067d..3e8ddfbf53cf7 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -139,27 +139,6 @@ _create_dummy_identifier(Parser *p)
     return _PyPegen_new_identifier(p, "");
 }
 
-static inline Py_ssize_t
-byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
-{
-    const char *str = PyUnicode_AsUTF8(line);
-    if (!str) {
-        return 0;
-    }
-    Py_ssize_t len = strlen(str);
-    if (col_offset > len + 1) {
-        col_offset = len + 1;
-    }
-    assert(col_offset >= 0);
-    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
-    if (!text) {
-        return 0;
-    }
-    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
-    Py_DECREF(text);
-    return size;
-}
-
 const char *
 _PyPegen_get_expr_name(expr_ty e)
 {
@@ -418,6 +397,27 @@ get_error_line(Parser *p, Py_ssize_t lineno)
     return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
 }
 
+Py_ssize_t
+_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
+{
+    const char *str = PyUnicode_AsUTF8(line);
+    if (!str) {
+        return 0;
+    }
+    Py_ssize_t len = strlen(str);
+    if (col_offset > len + 1) {
+        col_offset = len + 1;
+    }
+    assert(col_offset >= 0);
+    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
+    if (!text) {
+        return 0;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
+    Py_DECREF(text);
+    return size;
+}
+
 void *
 _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                     Py_ssize_t lineno, Py_ssize_t col_offset,
@@ -498,9 +498,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
     Py_ssize_t end_col_number = end_col_offset;
 
     if (p->tok->encoding != NULL) {
-        col_number = byte_offset_to_character_offset(error_line, col_offset);
+        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
         end_col_number = end_col_number > 0 ?
-                         byte_offset_to_character_offset(error_line, end_col_offset) :
+                         _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset) :
                          end_col_number;
     }
     tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
diff --git a/Parser/pegen.h b/Parser/pegen.h
index eac73bba151bc..c09b4a2927562 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -139,6 +139,7 @@ expr_ty _PyPegen_name_token(Parser *p);
 expr_ty _PyPegen_number_token(Parser *p);
 void *_PyPegen_string_token(Parser *p);
 const char *_PyPegen_get_expr_name(expr_ty);
+Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
 void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                           Py_ssize_t lineno, Py_ssize_t col_offset,
diff --git a/Python/_warnings.c b/Python/_warnings.c
index 9c8815c1a3e20..9f68da208731e 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -544,7 +544,7 @@ show_warning(PyObject *filename, int lineno, PyObject *text,
         PyFile_WriteString("\n", f_stderr);
     }
     else {
-        _Py_DisplaySourceLine(f_stderr, filename, lineno, 2);
+        _Py_DisplaySourceLine(f_stderr, filename, lineno, 2, NULL, NULL);
     }
 
 error:
diff --git a/Python/traceback.c b/Python/traceback.c
index f7dc5ad686476..a60f991642433 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -3,9 +3,11 @@
 
 #include "Python.h"
 
-#include "code.h"
+#include "code.h"                 // PyCode_Addr2Line etc
 #include "pycore_interp.h"        // PyInterpreterState.gc
 #include "frameobject.h"          // PyFrame_GetBack()
+#include "pycore_frame.h"         // _PyFrame_GetCode()
+#include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 #include "structmember.h"         // PyMemberDef
 #include "osdefs.h"               // SEP
 #ifdef HAVE_FCNTL_H
@@ -370,7 +372,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
 }
 
 int
-_Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
+_Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, int *truncation, PyObject **line)
 {
     int err = 0;
     int fd;
@@ -461,6 +463,11 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
         return err;
     }
 
+    if (line) {
+        Py_INCREF(lineobj);
+        *line = lineobj;
+    }
+
     /* remove the indentation of the line */
     kind = PyUnicode_KIND(lineobj);
     data = PyUnicode_DATA(lineobj);
@@ -480,6 +487,10 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
         }
     }
 
+    if (truncation != NULL) {
+        *truncation = i - indent;
+    }
+
     /* Write some spaces before the line */
     strcpy(buf, "          ");
     assert (strlen(buf) == 10);
@@ -501,8 +512,11 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
     return err;
 }
 
+#define _TRACEBACK_SOURCE_LINE_INDENT 4
+
 static int
-tb_displayline(PyObject *f, PyObject *filename, int lineno, PyObject *name)
+tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
+               PyFrameObject *frame, PyObject *name)
 {
     int err;
     PyObject *line;
@@ -517,9 +531,56 @@ tb_displayline(PyObject *f, PyObject *filename, int lineno, PyObject *name)
     Py_DECREF(line);
     if (err != 0)
         return err;
+    int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
+    PyObject* source_line = NULL;
     /* ignore errors since we can't report them, can we? */
-    if (_Py_DisplaySourceLine(f, filename, lineno, 4))
+    if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
+                               &truncation, &source_line)) {
+        int code_offset = tb->tb_lasti;
+        PyCodeObject* code = _PyFrame_GetCode(frame);
+
+        int start_line;
+        int end_line;
+        int start_col_byte_offset;
+        int end_col_byte_offset;
+        if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
+                                 &end_line, &end_col_byte_offset)) {
+            goto done;
+        }
+        if (start_line != end_line) {
+            goto done;
+        }
+
+        if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
+            goto done;
+        }
+        // Convert the utf-8 byte offset to the actual character offset so we
+        // print the right number of carets.
+        Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+        Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
+
+        char offset = truncation;
+        while (++offset <= start_offset) {
+            err = PyFile_WriteString(" ", f);
+            if (err < 0) {
+                goto done;
+            }
+        }
+        while (++offset <= end_offset + 1) {
+            err = PyFile_WriteString("^", f);
+            if (err < 0) {
+                goto done;
+            }
+        }
+        err = PyFile_WriteString("\n", f);
+    }
+
+    else {
         PyErr_Clear();
+    }
+    
+done:
+    Py_XDECREF(source_line);
     return err;
 }
 
@@ -576,8 +637,8 @@ tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit)
         }
         cnt++;
         if (err == 0 && cnt <= TB_RECURSIVE_CUTOFF) {
-            err = tb_displayline(f, code->co_filename, tb->tb_lineno,
-                                 code->co_name);
+            err = tb_displayline(tb, f, code->co_filename, tb->tb_lineno,
+                                 tb->tb_frame, code->co_name);
             if (err == 0) {
                 err = PyErr_CheckSignals();
             }



More information about the Python-checkins mailing list