[Python-checkins] gh-91616: re module, fix .fullmatch() mismatch when using Atomic Grouping or Possessive Quantifiers (GH-91681)

serhiy-storchaka webhook-mailer at python.org
Tue Apr 19 10:49:56 EDT 2022


https://github.com/python/cpython/commit/e4e8895ae36b44994e3f7b018345ba203ce6b55c
commit: e4e8895ae36b44994e3f7b018345ba203ce6b55c
branch: main
author: Ma Lin <animalize at users.noreply.github.com>
committer: serhiy-storchaka <storchaka at gmail.com>
date: 2022-04-19T17:49:36+03:00
summary:

gh-91616: re module, fix .fullmatch() mismatch when using Atomic Grouping or Possessive Quantifiers (GH-91681)

These jumps should use DO_JUMP0() instead of DO_JUMP():
- JUMP_POSS_REPEAT_1
- JUMP_POSS_REPEAT_2
- JUMP_ATOMIC_GROUP

files:
A Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst
M Lib/test/test_pathlib.py
M Lib/test/test_re.py
M Modules/_sre/sre_lib.h

diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 66e44479239cf..b8b08bf0ce1bb 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1388,6 +1388,7 @@ class _BasePathTest(object):
     #  |   |-- dirD
     #  |   |   `-- fileD
     #  |   `-- fileC
+    #  |   `-- novel.txt
     #  |-- dirE  # No permissions
     #  |-- fileA
     #  |-- linkA -> fileA
@@ -1412,6 +1413,8 @@ def cleanup():
             f.write(b"this is file B\n")
         with open(join('dirC', 'fileC'), 'wb') as f:
             f.write(b"this is file C\n")
+        with open(join('dirC', 'novel.txt'), 'wb') as f:
+            f.write(b"this is a novel\n")
         with open(join('dirC', 'dirD', 'fileD'), 'wb') as f:
             f.write(b"this is file D\n")
         os.chmod(join('dirE'), 0)
@@ -1679,6 +1682,9 @@ def _check(glob, expected):
         p = P(BASE, "dirC")
         _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"])
         _check(p.rglob("*/*"), ["dirC/dirD/fileD"])
+        # gh-91616, a re module regression
+        _check(p.rglob("*.txt"), ["dirC/novel.txt"])
+        _check(p.rglob("*.*"), ["dirC/novel.txt"])
 
     @os_helper.skip_unless_symlink
     def test_rglob_symlink_loop(self):
@@ -1689,7 +1695,8 @@ def test_rglob_symlink_loop(self):
         expect = {'brokenLink',
                   'dirA', 'dirA/linkC',
                   'dirB', 'dirB/fileB', 'dirB/linkD',
-                  'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC',
+                  'dirC', 'dirC/dirD', 'dirC/dirD/fileD',
+                  'dirC/fileC', 'dirC/novel.txt',
                   'dirE',
                   'fileA',
                   'linkA',
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 7bb8bfa8336e8..781bfd6ea2eda 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -2242,6 +2242,10 @@ def test_fullmatch_possessive_quantifiers(self):
         self.assertIsNone(re.fullmatch(r'a*+', 'ab'))
         self.assertIsNone(re.fullmatch(r'a?+', 'ab'))
         self.assertIsNone(re.fullmatch(r'a{1,3}+', 'ab'))
+        self.assertTrue(re.fullmatch(r'a++b', 'ab'))
+        self.assertTrue(re.fullmatch(r'a*+b', 'ab'))
+        self.assertTrue(re.fullmatch(r'a?+b', 'ab'))
+        self.assertTrue(re.fullmatch(r'a{1,3}+b', 'ab'))
 
         self.assertTrue(re.fullmatch(r'(?:ab)++', 'ab'))
         self.assertTrue(re.fullmatch(r'(?:ab)*+', 'ab'))
@@ -2251,6 +2255,10 @@ def test_fullmatch_possessive_quantifiers(self):
         self.assertIsNone(re.fullmatch(r'(?:ab)*+', 'abc'))
         self.assertIsNone(re.fullmatch(r'(?:ab)?+', 'abc'))
         self.assertIsNone(re.fullmatch(r'(?:ab){1,3}+', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?:ab)++c', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?:ab)*+c', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?:ab)?+c', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?:ab){1,3}+c', 'abc'))
 
     def test_findall_possessive_quantifiers(self):
         self.assertEqual(re.findall(r'a++', 'aab'), ['aa'])
@@ -2286,6 +2294,10 @@ def test_fullmatch_atomic_grouping(self):
         self.assertIsNone(re.fullmatch(r'(?>a*)', 'ab'))
         self.assertIsNone(re.fullmatch(r'(?>a?)', 'ab'))
         self.assertIsNone(re.fullmatch(r'(?>a{1,3})', 'ab'))
+        self.assertTrue(re.fullmatch(r'(?>a+)b', 'ab'))
+        self.assertTrue(re.fullmatch(r'(?>a*)b', 'ab'))
+        self.assertTrue(re.fullmatch(r'(?>a?)b', 'ab'))
+        self.assertTrue(re.fullmatch(r'(?>a{1,3})b', 'ab'))
 
         self.assertTrue(re.fullmatch(r'(?>(?:ab)+)', 'ab'))
         self.assertTrue(re.fullmatch(r'(?>(?:ab)*)', 'ab'))
@@ -2295,6 +2307,10 @@ def test_fullmatch_atomic_grouping(self):
         self.assertIsNone(re.fullmatch(r'(?>(?:ab)*)', 'abc'))
         self.assertIsNone(re.fullmatch(r'(?>(?:ab)?)', 'abc'))
         self.assertIsNone(re.fullmatch(r'(?>(?:ab){1,3})', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?>(?:ab)+)c', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?>(?:ab)*)c', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?>(?:ab)?)c', 'abc'))
+        self.assertTrue(re.fullmatch(r'(?>(?:ab){1,3})c', 'abc'))
 
     def test_findall_atomic_grouping(self):
         self.assertEqual(re.findall(r'(?>a+)', 'aab'), ['aa'])
@@ -2307,6 +2323,10 @@ def test_findall_atomic_grouping(self):
         self.assertEqual(re.findall(r'(?>(?:ab)?)', 'ababc'), ['ab', 'ab', '', ''])
         self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab'])
 
+    def test_bug_gh91616(self):
+        self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
+        self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))
+
 
 def get_debug_out(pat):
     with captured_stdout() as out:
diff --git a/Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst b/Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst
new file mode 100644
index 0000000000000..8f147237aed6b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-17-12-27-25.gh-issue-91616.gSQg69.rst
@@ -0,0 +1,2 @@
+:mod:`re` module, fix :meth:`~re.Pattern.fullmatch` mismatch when using Atomic
+Grouping or Possessive Quantifiers.
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index db624aa896d6a..efd6fdeccce3f 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -1259,8 +1259,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
             /* Check for minimum required matches. */
             while (ctx->count < (Py_ssize_t)pattern[1]) {
                 /* not enough matches */
-                DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
-                        &pattern[3]);
+                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
+                         &pattern[3]);
                 if (ret) {
                     RETURN_ON_ERROR(ret);
                     ctx->count++;
@@ -1306,8 +1306,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
 
                 /* We have not reached the maximin matches, so try to
                    match once more. */
-                DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
-                        &pattern[3]);
+                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
+                         &pattern[3]);
 
                 /* Check to see if the last attempted match
                    succeeded. */
@@ -1348,15 +1348,15 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
             TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
 
             /* Set the global Input pointer to this context's Input
-            pointer */
+               pointer */
             state->ptr = ptr;
 
             /* Evaluate the Atomic Group in a new context, terminating
                when the end of the group, represented by a SUCCESS op
                code, is reached. */
             /* Group Pattern begins at an offset of 1 code. */
-            DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group,
-                    &pattern[1]);
+            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
+                     &pattern[1]);
 
             /* Test Exit Condition */
             RETURN_ON_ERROR(ret);



More information about the Python-checkins mailing list