[Python-checkins] cpython (merge 3.2 -> 3.3): Issue #13169: The maximal repetition number in a regular expression has been

serhiy.storchaka python-checkins at python.org
Sat Feb 16 15:59:45 CET 2013


http://hg.python.org/cpython/rev/b78c321ee9a5
changeset:   82221:b78c321ee9a5
branch:      3.3
parent:      82217:34cfe145b286
parent:      82220:472a7c652cbd
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Feb 16 16:54:33 2013 +0200
summary:
  Issue #13169: The maximal repetition number in a regular expression has been
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
64-bit).

files:
  Lib/sre_compile.py   |   1 +
  Lib/sre_constants.py |   4 ---
  Lib/sre_parse.py     |   9 ++++++-
  Lib/test/test_re.py  |  34 +++++++++++++++++++++++++++++++-
  Misc/NEWS            |   4 +++
  Modules/_sre.c       |  18 +++++++++++-----
  Modules/sre.h        |   5 ++++
  7 files changed, 62 insertions(+), 13 deletions(-)


diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -13,6 +13,7 @@
 import _sre, sys
 import sre_parse
 from sre_constants import *
+from _sre import MAXREPEAT
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -15,10 +15,6 @@
 
 MAGIC = 20031017
 
-# max code word in this release
-
-MAXREPEAT = 65535
-
 # SRE standard exception (access as sre.error)
 # should this really be here?
 
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -15,6 +15,7 @@
 import sys
 
 from sre_constants import *
+from _sre import MAXREPEAT
 
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS = "*+?{"
@@ -537,10 +538,14 @@
                     continue
                 if lo:
                     min = int(lo)
+                    if min >= MAXREPEAT:
+                        raise OverflowError("the repetition number is too large")
                 if hi:
                     max = int(hi)
-                if max < min:
-                    raise error("bad repeat interval")
+                    if max >= MAXREPEAT:
+                        raise OverflowError("the repetition number is too large")
+                    if max < min:
+                        raise error("bad repeat interval")
             else:
                 raise error("not supported")
             # figure out which item to repeat
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,4 +1,5 @@
-from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
+from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
+        cpython_only
 import io
 import re
 from re import Scanner
@@ -980,6 +981,37 @@
         self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
         self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
 
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
+
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        string = "x" * 100000
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
+
+
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -178,6 +178,10 @@
 Library
 -------
 
+- Issue #13169: The maximal repetition number in a regular expression has been
+  increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
+  64-bit).
+
 - Issue #17143: Fix a missing import in the trace module.  Initial patch by
   Berker Peksag.
 
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -492,7 +492,7 @@
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
+    if (maxcount < (end - ptr) / state->charsize && maxcount != SRE_MAXREPEAT)
         end = ptr + maxcount*state->charsize;
 
     switch (pattern[0]) {
@@ -1109,7 +1109,7 @@
             } else {
                 /* general case */
                 LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
+                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
                     state->ptr = ctx->ptr;
                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1195,7 +1195,7 @@
             }
 
             if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
+                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
                 state->ptr != ctx->u.rep->last_ptr) {
                 /* we may have enough matches, but if we can
                    match another item, do so */
@@ -1273,7 +1273,7 @@
             LASTMARK_RESTORE();
 
             if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
+                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
                 RETURN_FAILURE;
 
             ctx->u.rep->count = ctx->count;
@@ -3037,7 +3037,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
@@ -3056,7 +3056,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
@@ -3942,6 +3942,12 @@
         Py_DECREF(x);
     }
 
+    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+    if (x) {
+        PyDict_SetItemString(d, "MAXREPEAT", x);
+        Py_DECREF(x);
+    }
+
     x = PyUnicode_FromString(copyright);
     if (x) {
         PyDict_SetItemString(d, "copyright", x);
diff --git a/Modules/sre.h b/Modules/sre.h
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -16,6 +16,11 @@
 /* size of a code word (must be unsigned short or larger, and
    large enough to hold a UCS4 character) */
 #define SRE_CODE Py_UCS4
+#if SIZEOF_SIZE_T > 4
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
+#else
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+#endif
 
 typedef struct {
     PyObject_VAR_HEAD

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list