[Python-checkins] cpython (3.3): Issue #16688: Fix backreferences did make case-insensitive regex fail on

serhiy.storchaka python-checkins at python.org
Sat Dec 29 22:45:32 CET 2012


http://hg.python.org/cpython/rev/44a4f9289faa
changeset:   81149:44a4f9289faa
branch:      3.3
parent:      81145:e9cbe583156f
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Dec 29 23:38:48 2012 +0200
summary:
  Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.
Patch by Matthew Barnett.

files:
  Lib/test/test_re.py |   5 +++++
  Misc/ACKS           |   1 +
  Misc/NEWS           |   3 +++
  Modules/_sre.c      |  11 ++++++-----
  4 files changed, 15 insertions(+), 5 deletions(-)


diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -968,6 +968,11 @@
         self.assertEqual(r, s)
         self.assertEqual(n, size + 1)
 
+    def test_bug_16688(self):
+        # Issue 16688: Backreferences make case-insensitive regex fail on
+        # non-ASCII strings.
+        self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
+        self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
 
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -70,6 +70,7 @@
 Nick Barnes
 Quentin Barnes
 David Barnett
+Matthew Barnett
 Richard Barran
 Cesar Eduardo Barros
 Des Barry
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -124,6 +124,9 @@
 Library
 -------
 
+- Issue #16688: Fix backreferences did make case-insensitive regex fail on
+  non-ASCII strings. Patch by Matthew Barnett.
+
 - Issue #16485: Fix file descriptor not being closed if file header patching
   fails on closing of aifc file.
 
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -492,7 +492,7 @@
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
+    if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
         end = ptr + maxcount*state->charsize;
 
     switch (pattern[0]) {
@@ -583,7 +583,7 @@
     Py_ssize_t i;
 
     /* check minimal length */
-    if (pattern[3] && (end - ptr) < pattern[3])
+    if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
         return 0;
 
     /* check known prefix */
@@ -801,7 +801,7 @@
         /* <INFO> <1=skip> <2=flags> <3=min> ... */
         if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
             TRACE(("reject (got %d chars, need %d)\n",
-                   (end - ctx->ptr), ctx->pattern[3]));
+                   (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
             RETURN_FAILURE;
         }
         ctx->pattern += ctx->pattern[1] + 1;
@@ -1329,9 +1329,10 @@
                         RETURN_FAILURE;
                     while (p < e) {
                         if (ctx->ptr >= end ||
-                            state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
+                            state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
+                            state->lower(SRE_CHARGET(state, p, 0)))
                             RETURN_FAILURE;
-                        p++;
+                        p += state->charsize;
                         ctx->ptr += state->charsize;
                     }
                 }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list