[Python-checkins] [3.9] bpo-44885: Correct the ast locations of f-strings with format specs and repeated expressions (GH-27729) (GH-27744)

pablogsal webhook-mailer at python.org
Thu Aug 12 13:46:50 EDT 2021


https://github.com/python/cpython/commit/4b86c9c5146c339c689830619be9d29b8f7bf417
commit: 4b86c9c5146c339c689830619be9d29b8f7bf417
branch: 3.9
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-08-12T18:46:35+01:00
summary:

[3.9] bpo-44885: Correct the ast locations of f-strings with format specs and repeated expressions (GH-27729) (GH-27744)

(cherry picked from commit 8e832fb2a2cb54d7262148b6ec15563dffb48d63)

Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst
M Lib/test/test_fstring.py
M Lib/test/test_peg_parser.py
M Parser/pegen/parse_string.c

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 05e7102d624f3e..fbd97ffbdec80c 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -212,11 +212,6 @@ def test_ast_line_numbers_nested(self):
         self.assertEqual(call.col_offset, 11)
 
     def test_ast_line_numbers_duplicate_expression(self):
-        """Duplicate expression
-
-        NOTE: this is currently broken, always sets location of the first
-        expression.
-        """
         expr = """
 a = 10
 f'{a * x()} {a * x()} {a * x()}'
@@ -266,9 +261,9 @@ def test_ast_line_numbers_duplicate_expression(self):
         self.assertEqual(binop.lineno, 3)
         self.assertEqual(binop.left.lineno, 3)
         self.assertEqual(binop.right.lineno, 3)
-        self.assertEqual(binop.col_offset, 3)  # FIXME: this is wrong
-        self.assertEqual(binop.left.col_offset, 3)  # FIXME: this is wrong
-        self.assertEqual(binop.right.col_offset, 7)  # FIXME: this is wrong
+        self.assertEqual(binop.col_offset, 13)
+        self.assertEqual(binop.left.col_offset, 13)
+        self.assertEqual(binop.right.col_offset, 17)
         # check the third binop location
         binop = t.body[1].value.values[4].value
         self.assertEqual(type(binop), ast.BinOp)
@@ -278,9 +273,32 @@ def test_ast_line_numbers_duplicate_expression(self):
         self.assertEqual(binop.lineno, 3)
         self.assertEqual(binop.left.lineno, 3)
         self.assertEqual(binop.right.lineno, 3)
-        self.assertEqual(binop.col_offset, 3)  # FIXME: this is wrong
-        self.assertEqual(binop.left.col_offset, 3)  # FIXME: this is wrong
-        self.assertEqual(binop.right.col_offset, 7)  # FIXME: this is wrong
+        self.assertEqual(binop.col_offset, 23)
+        self.assertEqual(binop.left.col_offset, 23)
+        self.assertEqual(binop.right.col_offset, 27)
+
+    def test_ast_numbers_fstring_with_formatting(self):
+
+        t = ast.parse('f"Here is that pesky {xxx:.3f} again"')
+        self.assertEqual(len(t.body), 1)
+        self.assertEqual(t.body[0].lineno, 1)
+
+        self.assertEqual(type(t.body[0]), ast.Expr)
+        self.assertEqual(type(t.body[0].value), ast.JoinedStr)
+        self.assertEqual(len(t.body[0].value.values), 3)
+
+        self.assertEqual(type(t.body[0].value.values[0]), ast.Constant)
+        self.assertEqual(type(t.body[0].value.values[1]), ast.FormattedValue)
+        self.assertEqual(type(t.body[0].value.values[2]), ast.Constant)
+
+        _, expr, _ = t.body[0].value.values
+
+        name = expr.value
+        self.assertEqual(type(name), ast.Name)
+        self.assertEqual(name.lineno, 1)
+        self.assertEqual(name.end_lineno, 1)
+        self.assertEqual(name.col_offset, 22)
+        self.assertEqual(name.end_col_offset, 25)
 
     def test_ast_line_numbers_multiline_fstring(self):
         # See bpo-30465 for details.
diff --git a/Lib/test/test_peg_parser.py b/Lib/test/test_peg_parser.py
index f4aaef848d54fe..b7bd86395d92d3 100644
--- a/Lib/test/test_peg_parser.py
+++ b/Lib/test/test_peg_parser.py
@@ -231,11 +231,6 @@ def f() -> Any:
     ('f-string_doublestarred', "f'{ {**x} }'"),
     ('f-string_escape_brace', "f'{{Escape'"),
     ('f-string_escape_closing_brace', "f'Escape}}'"),
-    ('f-string_repr', "f'{a!r}'"),
-    ('f-string_str', "f'{a!s}'"),
-    ('f-string_ascii', "f'{a!a}'"),
-    ('f-string_debug', "f'{a=}'"),
-    ('f-string_padding', "f'{a:03d}'"),
     ('f-string_multiline',
      """
         f'''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst
new file mode 100644
index 00000000000000..c6abd7363af711
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst	
@@ -0,0 +1,2 @@
+Correct the ast locations of f-strings with format specs and repeated
+expressions. Patch by Pablo Galindo
diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c
index c852e5b827c0cc..41fdc2d81d92bc 100644
--- a/Parser/pegen/parse_string.c
+++ b/Parser/pegen/parse_string.c
@@ -284,49 +284,48 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
 /* Fix locations for the given node and its children.
 
    `parent` is the enclosing node.
+   `expr_start` is the starting position of the expression (pointing to the open brace).
    `n` is the node which locations are going to be fixed relative to parent.
    `expr_str` is the child node's string representation, including braces.
 */
 static bool
-fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
+fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
 {
     *p_lines = 0;
     *p_cols = 0;
+    assert(expr_start != NULL && *expr_start == '{');
     if (parent && parent->bytes) {
         char *parent_str = PyBytes_AsString(parent->bytes);
         if (!parent_str) {
             return false;
         }
-        char *substr = strstr(parent_str, expr_str);
-        if (substr) {
-            // The following is needed, in order to correctly shift the column
-            // offset, in the case that (disregarding any whitespace) a newline
-            // immediately follows the opening curly brace of the fstring expression.
-            bool newline_after_brace = 1;
-            char *start = substr + 1;
-            while (start && *start != '}' && *start != '\n') {
-                if (*start != ' ' && *start != '\t' && *start != '\f') {
-                    newline_after_brace = 0;
-                    break;
-                }
-                start++;
+        // The following is needed, in order to correctly shift the column
+        // offset, in the case that (disregarding any whitespace) a newline
+        // immediately follows the opening curly brace of the fstring expression.
+        bool newline_after_brace = 1;
+        const char *start = expr_start + 1;
+        while (start && *start != '}' && *start != '\n') {
+            if (*start != ' ' && *start != '\t' && *start != '\f') {
+                newline_after_brace = 0;
+                break;
             }
+            start++;
+        }
 
-            // Account for the characters from the last newline character to our
-            // left until the beginning of substr.
-            if (!newline_after_brace) {
-                start = substr;
-                while (start > parent_str && *start != '\n') {
-                    start--;
-                }
-                *p_cols += (int)(substr - start);
+        // Account for the characters from the last newline character to our
+        // left until the beginning of expr_start.
+        if (!newline_after_brace) {
+            start = expr_start;
+            while (start > parent_str && *start != '\n') {
+                start--;
             }
-            /* adjust the start based on the number of newlines encountered
-               before the f-string expression */
-            for (char* p = parent_str; p < substr; p++) {
-                if (*p == '\n') {
-                    (*p_lines)++;
-                }
+            *p_cols += (int)(expr_start - start);
+        }
+        /* adjust the start based on the number of newlines encountered
+           before the f-string expression */
+        for (const char *p = parent_str; p < expr_start; p++) {
+            if (*p == '\n') {
+                (*p_lines)++;
             }
         }
     }
@@ -370,7 +369,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
 
     len = expr_end - expr_start;
     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
-    str = PyMem_Malloc(len + 3);
+    str = PyMem_Calloc(len + 3, sizeof(char));
     if (str == NULL) {
         PyErr_NoMemory();
         return NULL;
@@ -378,18 +377,11 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
 
     // The call to fstring_find_expr_location is responsible for finding the column offset
     // the generated AST nodes need to be shifted to the right, which is equal to the number
-    // of the f-string characters before the expression starts. In order to correctly compute
-    // this offset, strstr gets called in fstring_find_expr_location which only succeeds
-    // if curly braces appear before and after the f-string expression (exactly like they do
-    // in the f-string itself), hence the following lines.
-    str[0] = '{';
+    // of the f-string characters before the expression starts.
     memcpy(str+1, expr_start, len);
-    str[len+1] = '}';
-    str[len+2] = 0;
-
     int lines, cols;
-    if (!fstring_find_expr_location(t, str, &lines, &cols)) {
-        PyMem_FREE(str);
+    if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
+        PyMem_Free(str);
         return NULL;
     }
 



More information about the Python-checkins mailing list