[Python-checkins] bpo-41659: Disallow curly brace directly after primary (GH-22996)

lysnikolaou webhook-mailer at python.org
Tue Oct 27 14:54:30 EDT 2020


https://github.com/python/cpython/commit/15acc4eaba8519d7d5f2acaffde65446b44dcf79
commit: 15acc4eaba8519d7d5f2acaffde65446b44dcf79
branch: master
author: Lysandros Nikolaou <lisandrosnik at gmail.com>
committer: lysnikolaou <lisandrosnik at gmail.com>
date: 2020-10-27T20:54:20+02:00
summary:

bpo-41659: Disallow curly brace directly after primary (GH-22996)

files:
A Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst
M Grammar/python.gram
M Lib/test/test_exceptions.py
M Lib/test/test_syntax.py
M Parser/parser.c

diff --git a/Grammar/python.gram b/Grammar/python.gram
index 19c85accf8d9a..b8da554b8ec99 100644
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@@ -475,6 +475,7 @@ await_primary[expr_ty] (memo):
     | AWAIT a=primary { CHECK_VERSION(expr_ty, 5, "Await expressions are", _Py_Await(a, EXTRA)) }
     | primary
 primary[expr_ty]:
+    | invalid_primary  # must be before 'primay genexp' because of invalid_genexp
     | a=primary '.' b=NAME { _Py_Attribute(a, b->v.Name.id, Load, EXTRA) }
     | a=primary b=genexp { _Py_Call(a, CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)), NULL, EXTRA) }
     | a=primary '(' b=[arguments] ')' {
@@ -682,6 +683,8 @@ invalid_del_stmt:
         RAISE_SYNTAX_ERROR_INVALID_TARGET(DEL_TARGETS, a) }
 invalid_block:
     | NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block") }
+invalid_primary:
+    | primary a='{' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "invalid syntax") }
 invalid_comprehension:
     | ('[' | '(' | '{') a=starred_expression for_if_clauses {
         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable unpacking cannot be used in comprehension") }
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 1ec446887770e..4dbf5fe5d5bc3 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -208,6 +208,7 @@ def testSyntaxErrorOffset(self):
         check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
         check('x = "a', 1, 7)
         check('lambda x: x = 2', 1, 1)
+        check('f{a + b + c}', 1, 2)
 
         # Errors thrown by compile.c
         check('class foo:return 1', 1, 11)
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 7c3302c1d46ae..c25b85246b919 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -802,6 +802,9 @@ def _check_error(self, code, errtext,
         else:
             self.fail("compile() did not raise SyntaxError")
 
+    def test_curly_brace_after_primary_raises_immediately(self):
+        self._check_error("f{", "invalid syntax", mode="single")
+
     def test_assign_call(self):
         self._check_error("f() = 1", "assign")
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst
new file mode 100644
index 0000000000000..038749a7b16c9
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-27-18-32-49.bpo-41659.d4a-8o.rst	
@@ -0,0 +1,3 @@
+Fix a bug in the parser, where a curly brace following a `primary` didn't fail immediately.
+This led to invalid expressions like `a {b}` to throw a :exc:`SyntaxError` with a wrong offset,
+or invalid expressions ending with a curly brace like `a {` to not fail immediately in the REPL.
\ No newline at end of file
diff --git a/Parser/parser.c b/Parser/parser.c
index e438f06c9be9a..a22cf2752d18d 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -216,173 +216,174 @@ static KeywordToken *reserved_keywords[] = {
 #define invalid_ann_assign_target_type 1147
 #define invalid_del_stmt_type 1148
 #define invalid_block_type 1149
-#define invalid_comprehension_type 1150
-#define invalid_dict_comprehension_type 1151
-#define invalid_parameters_type 1152
-#define invalid_lambda_parameters_type 1153
-#define invalid_star_etc_type 1154
-#define invalid_lambda_star_etc_type 1155
-#define invalid_double_type_comments_type 1156
-#define invalid_with_item_type 1157
-#define invalid_for_target_type 1158
-#define invalid_group_type 1159
-#define invalid_import_from_targets_type 1160
-#define _loop0_1_type 1161
-#define _loop0_2_type 1162
-#define _loop0_4_type 1163
-#define _gather_3_type 1164
-#define _loop0_6_type 1165
-#define _gather_5_type 1166
-#define _loop0_8_type 1167
-#define _gather_7_type 1168
-#define _loop0_10_type 1169
-#define _gather_9_type 1170
-#define _loop1_11_type 1171
-#define _loop0_13_type 1172
-#define _gather_12_type 1173
-#define _tmp_14_type 1174
-#define _tmp_15_type 1175
-#define _tmp_16_type 1176
-#define _tmp_17_type 1177
-#define _tmp_18_type 1178
-#define _tmp_19_type 1179
-#define _tmp_20_type 1180
-#define _tmp_21_type 1181
-#define _loop1_22_type 1182
-#define _tmp_23_type 1183
-#define _tmp_24_type 1184
-#define _loop0_26_type 1185
-#define _gather_25_type 1186
-#define _loop0_28_type 1187
-#define _gather_27_type 1188
-#define _tmp_29_type 1189
-#define _tmp_30_type 1190
-#define _loop0_31_type 1191
-#define _loop1_32_type 1192
-#define _loop0_34_type 1193
-#define _gather_33_type 1194
-#define _tmp_35_type 1195
-#define _loop0_37_type 1196
-#define _gather_36_type 1197
-#define _tmp_38_type 1198
-#define _loop0_40_type 1199
-#define _gather_39_type 1200
-#define _loop0_42_type 1201
-#define _gather_41_type 1202
-#define _loop0_44_type 1203
-#define _gather_43_type 1204
-#define _loop0_46_type 1205
-#define _gather_45_type 1206
-#define _tmp_47_type 1207
-#define _loop1_48_type 1208
-#define _tmp_49_type 1209
-#define _tmp_50_type 1210
-#define _tmp_51_type 1211
-#define _tmp_52_type 1212
-#define _tmp_53_type 1213
-#define _loop0_54_type 1214
-#define _loop0_55_type 1215
-#define _loop0_56_type 1216
-#define _loop1_57_type 1217
-#define _loop0_58_type 1218
-#define _loop1_59_type 1219
-#define _loop1_60_type 1220
-#define _loop1_61_type 1221
-#define _loop0_62_type 1222
-#define _loop1_63_type 1223
-#define _loop0_64_type 1224
-#define _loop1_65_type 1225
-#define _loop0_66_type 1226
-#define _loop1_67_type 1227
-#define _loop1_68_type 1228
-#define _tmp_69_type 1229
-#define _loop0_71_type 1230
-#define _gather_70_type 1231
-#define _loop1_72_type 1232
-#define _loop0_74_type 1233
-#define _gather_73_type 1234
-#define _loop1_75_type 1235
-#define _loop0_76_type 1236
-#define _loop0_77_type 1237
-#define _loop0_78_type 1238
-#define _loop1_79_type 1239
-#define _loop0_80_type 1240
-#define _loop1_81_type 1241
-#define _loop1_82_type 1242
-#define _loop1_83_type 1243
-#define _loop0_84_type 1244
-#define _loop1_85_type 1245
-#define _loop0_86_type 1246
-#define _loop1_87_type 1247
-#define _loop0_88_type 1248
-#define _loop1_89_type 1249
-#define _loop1_90_type 1250
-#define _loop1_91_type 1251
-#define _loop1_92_type 1252
-#define _tmp_93_type 1253
-#define _loop0_95_type 1254
-#define _gather_94_type 1255
-#define _tmp_96_type 1256
-#define _tmp_97_type 1257
-#define _tmp_98_type 1258
-#define _tmp_99_type 1259
-#define _loop1_100_type 1260
-#define _tmp_101_type 1261
-#define _tmp_102_type 1262
-#define _loop0_104_type 1263
-#define _gather_103_type 1264
-#define _loop1_105_type 1265
-#define _loop0_106_type 1266
-#define _loop0_107_type 1267
-#define _loop0_109_type 1268
-#define _gather_108_type 1269
-#define _tmp_110_type 1270
-#define _loop0_112_type 1271
-#define _gather_111_type 1272
-#define _loop0_114_type 1273
-#define _gather_113_type 1274
-#define _loop0_116_type 1275
-#define _gather_115_type 1276
-#define _loop0_118_type 1277
-#define _gather_117_type 1278
-#define _loop0_119_type 1279
-#define _loop0_121_type 1280
-#define _gather_120_type 1281
-#define _tmp_122_type 1282
-#define _loop0_124_type 1283
-#define _gather_123_type 1284
-#define _loop0_126_type 1285
-#define _gather_125_type 1286
-#define _tmp_127_type 1287
-#define _loop0_128_type 1288
-#define _loop0_129_type 1289
-#define _loop0_130_type 1290
-#define _tmp_131_type 1291
-#define _tmp_132_type 1292
-#define _loop0_133_type 1293
-#define _tmp_134_type 1294
-#define _loop0_135_type 1295
-#define _tmp_136_type 1296
-#define _tmp_137_type 1297
-#define _tmp_138_type 1298
-#define _tmp_139_type 1299
-#define _tmp_140_type 1300
-#define _tmp_141_type 1301
-#define _tmp_142_type 1302
-#define _tmp_143_type 1303
-#define _tmp_144_type 1304
-#define _tmp_145_type 1305
-#define _tmp_146_type 1306
-#define _tmp_147_type 1307
-#define _tmp_148_type 1308
-#define _tmp_149_type 1309
-#define _tmp_150_type 1310
-#define _tmp_151_type 1311
-#define _tmp_152_type 1312
-#define _loop1_153_type 1313
-#define _loop1_154_type 1314
-#define _tmp_155_type 1315
-#define _tmp_156_type 1316
+#define invalid_primary_type 1150  // Left-recursive
+#define invalid_comprehension_type 1151
+#define invalid_dict_comprehension_type 1152
+#define invalid_parameters_type 1153
+#define invalid_lambda_parameters_type 1154
+#define invalid_star_etc_type 1155
+#define invalid_lambda_star_etc_type 1156
+#define invalid_double_type_comments_type 1157
+#define invalid_with_item_type 1158
+#define invalid_for_target_type 1159
+#define invalid_group_type 1160
+#define invalid_import_from_targets_type 1161
+#define _loop0_1_type 1162
+#define _loop0_2_type 1163
+#define _loop0_4_type 1164
+#define _gather_3_type 1165
+#define _loop0_6_type 1166
+#define _gather_5_type 1167
+#define _loop0_8_type 1168
+#define _gather_7_type 1169
+#define _loop0_10_type 1170
+#define _gather_9_type 1171
+#define _loop1_11_type 1172
+#define _loop0_13_type 1173
+#define _gather_12_type 1174
+#define _tmp_14_type 1175
+#define _tmp_15_type 1176
+#define _tmp_16_type 1177
+#define _tmp_17_type 1178
+#define _tmp_18_type 1179
+#define _tmp_19_type 1180
+#define _tmp_20_type 1181
+#define _tmp_21_type 1182
+#define _loop1_22_type 1183
+#define _tmp_23_type 1184
+#define _tmp_24_type 1185
+#define _loop0_26_type 1186
+#define _gather_25_type 1187
+#define _loop0_28_type 1188
+#define _gather_27_type 1189
+#define _tmp_29_type 1190
+#define _tmp_30_type 1191
+#define _loop0_31_type 1192
+#define _loop1_32_type 1193
+#define _loop0_34_type 1194
+#define _gather_33_type 1195
+#define _tmp_35_type 1196
+#define _loop0_37_type 1197
+#define _gather_36_type 1198
+#define _tmp_38_type 1199
+#define _loop0_40_type 1200
+#define _gather_39_type 1201
+#define _loop0_42_type 1202
+#define _gather_41_type 1203
+#define _loop0_44_type 1204
+#define _gather_43_type 1205
+#define _loop0_46_type 1206
+#define _gather_45_type 1207
+#define _tmp_47_type 1208
+#define _loop1_48_type 1209
+#define _tmp_49_type 1210
+#define _tmp_50_type 1211
+#define _tmp_51_type 1212
+#define _tmp_52_type 1213
+#define _tmp_53_type 1214
+#define _loop0_54_type 1215
+#define _loop0_55_type 1216
+#define _loop0_56_type 1217
+#define _loop1_57_type 1218
+#define _loop0_58_type 1219
+#define _loop1_59_type 1220
+#define _loop1_60_type 1221
+#define _loop1_61_type 1222
+#define _loop0_62_type 1223
+#define _loop1_63_type 1224
+#define _loop0_64_type 1225
+#define _loop1_65_type 1226
+#define _loop0_66_type 1227
+#define _loop1_67_type 1228
+#define _loop1_68_type 1229
+#define _tmp_69_type 1230
+#define _loop0_71_type 1231
+#define _gather_70_type 1232
+#define _loop1_72_type 1233
+#define _loop0_74_type 1234
+#define _gather_73_type 1235
+#define _loop1_75_type 1236
+#define _loop0_76_type 1237
+#define _loop0_77_type 1238
+#define _loop0_78_type 1239
+#define _loop1_79_type 1240
+#define _loop0_80_type 1241
+#define _loop1_81_type 1242
+#define _loop1_82_type 1243
+#define _loop1_83_type 1244
+#define _loop0_84_type 1245
+#define _loop1_85_type 1246
+#define _loop0_86_type 1247
+#define _loop1_87_type 1248
+#define _loop0_88_type 1249
+#define _loop1_89_type 1250
+#define _loop1_90_type 1251
+#define _loop1_91_type 1252
+#define _loop1_92_type 1253
+#define _tmp_93_type 1254
+#define _loop0_95_type 1255
+#define _gather_94_type 1256
+#define _tmp_96_type 1257
+#define _tmp_97_type 1258
+#define _tmp_98_type 1259
+#define _tmp_99_type 1260
+#define _loop1_100_type 1261
+#define _tmp_101_type 1262
+#define _tmp_102_type 1263
+#define _loop0_104_type 1264
+#define _gather_103_type 1265
+#define _loop1_105_type 1266
+#define _loop0_106_type 1267
+#define _loop0_107_type 1268
+#define _loop0_109_type 1269
+#define _gather_108_type 1270
+#define _tmp_110_type 1271
+#define _loop0_112_type 1272
+#define _gather_111_type 1273
+#define _loop0_114_type 1274
+#define _gather_113_type 1275
+#define _loop0_116_type 1276
+#define _gather_115_type 1277
+#define _loop0_118_type 1278
+#define _gather_117_type 1279
+#define _loop0_119_type 1280
+#define _loop0_121_type 1281
+#define _gather_120_type 1282
+#define _tmp_122_type 1283
+#define _loop0_124_type 1284
+#define _gather_123_type 1285
+#define _loop0_126_type 1286
+#define _gather_125_type 1287
+#define _tmp_127_type 1288
+#define _loop0_128_type 1289
+#define _loop0_129_type 1290
+#define _loop0_130_type 1291
+#define _tmp_131_type 1292
+#define _tmp_132_type 1293
+#define _loop0_133_type 1294
+#define _tmp_134_type 1295
+#define _loop0_135_type 1296
+#define _tmp_136_type 1297
+#define _tmp_137_type 1298
+#define _tmp_138_type 1299
+#define _tmp_139_type 1300
+#define _tmp_140_type 1301
+#define _tmp_141_type 1302
+#define _tmp_142_type 1303
+#define _tmp_143_type 1304
+#define _tmp_144_type 1305
+#define _tmp_145_type 1306
+#define _tmp_146_type 1307
+#define _tmp_147_type 1308
+#define _tmp_148_type 1309
+#define _tmp_149_type 1310
+#define _tmp_150_type 1311
+#define _tmp_151_type 1312
+#define _tmp_152_type 1313
+#define _loop1_153_type 1314
+#define _loop1_154_type 1315
+#define _tmp_155_type 1316
+#define _tmp_156_type 1317
 
 static mod_ty file_rule(Parser *p);
 static mod_ty interactive_rule(Parser *p);
@@ -534,6 +535,7 @@ static void *invalid_assignment_rule(Parser *p);
 static expr_ty invalid_ann_assign_target_rule(Parser *p);
 static void *invalid_del_stmt_rule(Parser *p);
 static void *invalid_block_rule(Parser *p);
+static void *invalid_primary_rule(Parser *p);
 static void *invalid_comprehension_rule(Parser *p);
 static void *invalid_dict_comprehension_rule(Parser *p);
 static void *invalid_parameters_rule(Parser *p);
@@ -10275,6 +10277,7 @@ await_primary_rule(Parser *p)
 
 // Left-recursive
 // primary:
+//     | invalid_primary
 //     | primary '.' NAME
 //     | primary genexp
 //     | primary '(' arguments? ')'
@@ -10328,6 +10331,25 @@ primary_raw(Parser *p)
     UNUSED(_start_lineno); // Only used by EXTRA macro
     int _start_col_offset = p->tokens[_mark]->col_offset;
     UNUSED(_start_col_offset); // Only used by EXTRA macro
+    if (p->call_invalid_rules) { // invalid_primary
+        if (p->error_indicator) {
+            D(p->level--);
+            return NULL;
+        }
+        D(fprintf(stderr, "%*c> primary[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_primary"));
+        void *invalid_primary_var;
+        if (
+            (invalid_primary_var = invalid_primary_rule(p))  // invalid_primary
+        )
+        {
+            D(fprintf(stderr, "%*c+ primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_primary"));
+            _res = invalid_primary_var;
+            goto done;
+        }
+        p->mark = _mark;
+        D(fprintf(stderr, "%*c%s primary[%d-%d]: %s failed!\n", p->level, ' ',
+                  p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_primary"));
+    }
     { // primary '.' NAME
         if (p->error_indicator) {
             D(p->level--);
@@ -15028,6 +15050,51 @@ invalid_block_rule(Parser *p)
     return _res;
 }
 
+// Left-recursive
+// invalid_primary: primary '{'
+static void *
+invalid_primary_rule(Parser *p)
+{
+    D(p->level++);
+    if (p->error_indicator) {
+        D(p->level--);
+        return NULL;
+    }
+    void * _res = NULL;
+    int _mark = p->mark;
+    { // primary '{'
+        if (p->error_indicator) {
+            D(p->level--);
+            return NULL;
+        }
+        D(fprintf(stderr, "%*c> invalid_primary[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "primary '{'"));
+        Token * a;
+        expr_ty primary_var;
+        if (
+            (primary_var = primary_rule(p))  // primary
+            &&
+            (a = _PyPegen_expect_token(p, 25))  // token='{'
+        )
+        {
+            D(fprintf(stderr, "%*c+ invalid_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "primary '{'"));
+            _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "invalid syntax" );
+            if (_res == NULL && PyErr_Occurred()) {
+                p->error_indicator = 1;
+                D(p->level--);
+                return NULL;
+            }
+            goto done;
+        }
+        p->mark = _mark;
+        D(fprintf(stderr, "%*c%s invalid_primary[%d-%d]: %s failed!\n", p->level, ' ',
+                  p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "primary '{'"));
+    }
+    _res = NULL;
+  done:
+    D(p->level--);
+    return _res;
+}
+
 // invalid_comprehension: ('[' | '(' | '{') starred_expression for_if_clauses
 static void *
 invalid_comprehension_rule(Parser *p)



More information about the Python-checkins mailing list