[Python-checkins] [3.10] gh-93671: Avoid exponential backtracking in deeply nested sequence patterns in match statements (GH-93680) (#93690)

pablogsal webhook-mailer at python.org
Fri Jun 10 14:34:25 EDT 2022


https://github.com/python/cpython/commit/8f36c735b2aa694b140a19d9425bbdf54de5f212
commit: 8f36c735b2aa694b140a19d9425bbdf54de5f212
branch: 3.10
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2022-06-10T19:34:15+01:00
summary:

[3.10] gh-93671: Avoid exponential backtracking in deeply nested sequence patterns in match statements (GH-93680) (#93690)

Co-authored-by: Łukasz Langa <lukasz at langa.pl>.
(cherry picked from commit 53a8b17895e91d08f76a2fb59a555d012cd85ab4)

Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2022-06-10-12-03-17.gh-issue-93671.idkQqG.rst
M Grammar/python.gram
M Lib/test/test_patma.py
M Parser/parser.c

diff --git a/Grammar/python.gram b/Grammar/python.gram
index 84b9c9b82720b..9f1c826f3fb48 100644
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@@ -248,7 +248,8 @@ as_pattern[pattern_ty]:
 or_pattern[pattern_ty]:
     | patterns[asdl_pattern_seq*]='|'.closed_pattern+ {
         asdl_seq_LEN(patterns) == 1 ? asdl_seq_GET(patterns, 0) : _PyAST_MatchOr(patterns, EXTRA) }
-closed_pattern[pattern_ty]:
+
+closed_pattern[pattern_ty] (memo):
     | literal_pattern
     | capture_pattern
     | wildcard_pattern
@@ -329,7 +330,8 @@ maybe_sequence_pattern[asdl_seq*]:
 maybe_star_pattern[pattern_ty]:
     | star_pattern
     | pattern
-star_pattern[pattern_ty]:
+
+star_pattern[pattern_ty] (memo):
     | '*' target=pattern_capture_target {
         _PyAST_MatchStar(target->v.Name.id, EXTRA) }
     | '*' wildcard_pattern {
diff --git a/Lib/test/test_patma.py b/Lib/test/test_patma.py
index aa18e29e22548..b8444822c890c 100644
--- a/Lib/test/test_patma.py
+++ b/Lib/test/test_patma.py
@@ -3138,6 +3138,27 @@ def f(command):             # 0
         self.assertListEqual(self._trace(f, "go x"), [1, 2, 3])
         self.assertListEqual(self._trace(f, "spam"), [1, 2, 3])
 
+    def test_parser_deeply_nested_patterns(self):
+        # Deeply nested patterns can cause exponential backtracking when parsing.
+        # See gh-93671 for more information.
+
+        levels = 100
+
+        patterns = [
+            "A" + "(" * levels + ")" * levels,
+            "{1:" * levels + "1" + "}" * levels,
+            "[" * levels + "1" + "]" * levels,
+        ]
+
+        for pattern in patterns:
+            with self.subTest(pattern):
+                code = inspect.cleandoc("""
+                    match None:
+                        case {}:
+                            pass
+                """.format(pattern))
+                compile(code, "<string>", "exec")
+
 
 if __name__ == "__main__":
     """
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-10-12-03-17.gh-issue-93671.idkQqG.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-10-12-03-17.gh-issue-93671.idkQqG.rst
new file mode 100644
index 0000000000000..a775715335951
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-10-12-03-17.gh-issue-93671.idkQqG.rst	
@@ -0,0 +1,2 @@
+Fix some exponential backtrace case happening with deeply nested sequence
+patterns in match statements. Patch by Pablo Galindo
diff --git a/Parser/parser.c b/Parser/parser.c
index 862950018a312..59638da601a05 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -5920,6 +5920,10 @@ closed_pattern_rule(Parser *p)
         return NULL;
     }
     pattern_ty _res = NULL;
+    if (_PyPegen_is_memoized(p, closed_pattern_type, &_res)) {
+        p->level--;
+        return _res;
+    }
     int _mark = p->mark;
     { // literal_pattern
         if (p->error_indicator) {
@@ -6075,6 +6079,7 @@ closed_pattern_rule(Parser *p)
     }
     _res = NULL;
   done:
+    _PyPegen_insert_memo(p, _mark, closed_pattern_type, _res);
     p->level--;
     return _res;
 }
@@ -7598,6 +7603,10 @@ star_pattern_rule(Parser *p)
         return NULL;
     }
     pattern_ty _res = NULL;
+    if (_PyPegen_is_memoized(p, star_pattern_type, &_res)) {
+        p->level--;
+        return _res;
+    }
     int _mark = p->mark;
     if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
         p->error_indicator = 1;
@@ -7682,6 +7691,7 @@ star_pattern_rule(Parser *p)
     }
     _res = NULL;
   done:
+    _PyPegen_insert_memo(p, _mark, star_pattern_type, _res);
     p->level--;
     return _res;
 }



More information about the Python-checkins mailing list