[issue20998] fullmatch isn't matching correctly under re.IGNORECASE

Serhiy Storchaka report at bugs.python.org
Sun Apr 13 17:28:32 CEST 2014


Serhiy Storchaka added the comment:

> After stepping through the code for that regex that fails, I concluded
> that the condition shouldn't depend on ctx->match_all at that point
> after all.

Tests are passed without this check. But I'm not sure it is not needed. At 
least without this check the code is not equivalent to the code before adding 
support for fullmatch(). So I prefer to left it as is.

> I thought I'd initialised it in all the places it's used.
> 
> I admit that I find the code a little hard to follow at times... :-(

Indeed, it is initialized in Modules/_sre.c, and it is always 0. Perhaps it 
will be more consistent to get rid of the match_all field in the SRE_STATE 
structure and pass it as argument.

----------
Added file: http://bugs.python.org/file34799/issue20998_2.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue20998>
_______________________________________
-------------- next part --------------
diff -r f86504da2fcc Lib/test/test_re.py
--- a/Lib/test/test_re.py	Sun Apr 13 17:08:51 2014 +0300
+++ b/Lib/test/test_re.py	Sun Apr 13 18:23:53 2014 +0300
@@ -1223,6 +1223,11 @@
             pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
             (7, 9))
 
+    def test_bug_20998(self):
+        # Issue #20998: Fullmatch of repeated single character pattern
+        # with ignore case.
+        self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
+
 
 class PatternReprTests(unittest.TestCase):
     def check(self, pattern, expected):
diff -r f86504da2fcc Modules/_sre.c
--- a/Modules/_sre.c	Sun Apr 13 17:08:51 2014 +0300
+++ b/Modules/_sre.c	Sun Apr 13 18:23:53 2014 +0300
@@ -505,14 +505,14 @@
 }
 
 LOCAL(Py_ssize_t)
-sre_match(SRE_STATE* state, SRE_CODE* pattern)
+sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
 {
     if (state->charsize == 1)
-        return sre_ucs1_match(state, pattern);
+        return sre_ucs1_match(state, pattern, match_all);
     if (state->charsize == 2)
-        return sre_ucs2_match(state, pattern);
+        return sre_ucs2_match(state, pattern, match_all);
     assert(state->charsize == 4);
-    return sre_ucs4_match(state, pattern);
+    return sre_ucs4_match(state, pattern, match_all);
 }
 
 LOCAL(Py_ssize_t)
@@ -576,7 +576,7 @@
 
     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
 
-    status = sre_match(&state, PatternObject_GetCode(self));
+    status = sre_match(&state, PatternObject_GetCode(self), 0);
 
     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
     if (PyErr_Occurred())
@@ -609,12 +609,11 @@
     if (!string)
         return NULL;
 
-    state.match_all = 1;
     state.ptr = state.start;
 
     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
 
-    status = sre_match(&state, PatternObject_GetCode(self));
+    status = sre_match(&state, PatternObject_GetCode(self), 1);
 
     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
     if (PyErr_Occurred())
@@ -2572,7 +2571,7 @@
 
     state->ptr = state->start;
 
-    status = sre_match(state, PatternObject_GetCode(self->pattern));
+    status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
     if (PyErr_Occurred())
         return NULL;
 
diff -r f86504da2fcc Modules/sre.h
--- a/Modules/sre.h	Sun Apr 13 17:08:51 2014 +0300
+++ b/Modules/sre.h	Sun Apr 13 18:23:53 2014 +0300
@@ -86,7 +86,6 @@
     SRE_REPEAT *repeat;
     /* hooks */
     SRE_TOLOWER_HOOK lower;
-    int match_all;
 } SRE_STATE;
 
 typedef struct {
diff -r f86504da2fcc Modules/sre_lib.h
--- a/Modules/sre_lib.h	Sun Apr 13 17:08:51 2014 +0300
+++ b/Modules/sre_lib.h	Sun Apr 13 18:23:53 2014 +0300
@@ -173,7 +173,7 @@
     }
 }
 
-LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern);
+LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
 
 LOCAL(Py_ssize_t)
 SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
@@ -259,7 +259,7 @@
         /* repeated single character pattern */
         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
         while ((SRE_CHAR*) state->ptr < end) {
-            i = SRE(match)(state, pattern);
+            i = SRE(match)(state, pattern, 0);
             if (i < 0)
                 return i;
             if (!i)
@@ -490,7 +490,7 @@
 /* check if string matches the given pattern.  returns <0 for
    error, 0 for failure, and 1 for success */
 LOCAL(Py_ssize_t)
-SRE(match)(SRE_STATE* state, SRE_CODE* pattern)
+SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
 {
     SRE_CHAR* end = (SRE_CHAR *)state->end;
     Py_ssize_t alloc_pos, ctx_pos = -1;
@@ -507,7 +507,7 @@
     ctx->last_ctx_pos = -1;
     ctx->jump = JUMP_NONE;
     ctx->pattern = pattern;
-    ctx->match_all = state->match_all;
+    ctx->match_all = match_all;
     ctx_pos = alloc_pos;
 
 entrance:
@@ -824,7 +824,7 @@
             }
 
             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
-                (!ctx->match_all || ctx->ptr == state->end)) {
+                (!match_all || ctx->ptr == state->end)) {
                 /* tail is empty.  we're finished */
                 state->ptr = ctx->ptr;
                 RETURN_SUCCESS;
@@ -1269,7 +1269,7 @@
                     state->ptr = ptr - (prefix_len - prefix_skip - 1);
                     if (flags & SRE_INFO_LITERAL)
                         return 1; /* we got all of it */
-                    status = SRE(match)(state, pattern + 2*prefix_skip);
+                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
                     if (status != 0)
                         return status;
                     /* close but no cigar -- try again */
@@ -1302,7 +1302,7 @@
             state->ptr = ++ptr;
             if (flags & SRE_INFO_LITERAL)
                 return 1; /* we got all of it */
-            status = SRE(match)(state, pattern + 2);
+            status = SRE(match)(state, pattern + 2, 0);
             if (status != 0)
                 break;
         }
@@ -1317,7 +1317,7 @@
             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
             state->start = ptr;
             state->ptr = ptr;
-            status = SRE(match)(state, pattern);
+            status = SRE(match)(state, pattern, 0);
             if (status != 0)
                 break;
             ptr++;
@@ -1327,7 +1327,7 @@
         while (ptr <= end) {
             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
             state->start = state->ptr = ptr++;
-            status = SRE(match)(state, pattern);
+            status = SRE(match)(state, pattern, 0);
             if (status != 0)
                 break;
         }


More information about the Python-bugs-list mailing list