[Python-checkins] cpython (3.3): Issue #16688: Fix backreferences did make case-insensitive regex fail on
serhiy.storchaka
python-checkins at python.org
Sat Dec 29 22:45:32 CET 2012
http://hg.python.org/cpython/rev/44a4f9289faa
changeset: 81149:44a4f9289faa
branch: 3.3
parent: 81145:e9cbe583156f
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sat Dec 29 23:38:48 2012 +0200
summary:
Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.
Patch by Matthew Barnett.
files:
Lib/test/test_re.py | 5 +++++
Misc/ACKS | 1 +
Misc/NEWS | 3 +++
Modules/_sre.c | 11 ++++++-----
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -968,6 +968,11 @@
self.assertEqual(r, s)
self.assertEqual(n, size + 1)
+ def test_bug_16688(self):
+ # Issue 16688: Backreferences make case-insensitive regex fail on
+ # non-ASCII strings.
+ self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
+ self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -70,6 +70,7 @@
Nick Barnes
Quentin Barnes
David Barnett
+Matthew Barnett
Richard Barran
Cesar Eduardo Barros
Des Barry
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -124,6 +124,9 @@
Library
-------
+- Issue #16688: Fix backreferences did make case-insensitive regex fail on
+ non-ASCII strings. Patch by Matthew Barnett.
+
- Issue #16485: Fix file descriptor not being closed if file header patching
fails on closing of aifc file.
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -492,7 +492,7 @@
Py_ssize_t i;
/* adjust end */
- if (maxcount < end - ptr && maxcount != 65535)
+ if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
end = ptr + maxcount*state->charsize;
switch (pattern[0]) {
@@ -583,7 +583,7 @@
Py_ssize_t i;
/* check minimal length */
- if (pattern[3] && (end - ptr) < pattern[3])
+ if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
return 0;
/* check known prefix */
@@ -801,7 +801,7 @@
/* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n",
- (end - ctx->ptr), ctx->pattern[3]));
+ (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
RETURN_FAILURE;
}
ctx->pattern += ctx->pattern[1] + 1;
@@ -1329,9 +1329,10 @@
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
- state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
+ state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
+ state->lower(SRE_CHARGET(state, p, 0)))
RETURN_FAILURE;
- p++;
+ p += state->charsize;
ctx->ptr += state->charsize;
}
}
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list