[Python-checkins] cpython (merge 3.6 -> default): Issue #25953: re.sub() now raises an error for invalid numerical group

serhiy.storchaka python-checkins at python.org
Sun Oct 23 05:12:31 EDT 2016


https://hg.python.org/cpython/rev/15e3695affa2
changeset:   104651:15e3695affa2
parent:      104649:e5e05ac07aee
parent:      104650:cea983246919
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Oct 23 12:12:05 2016 +0300
summary:
  Issue #25953: re.sub() now raises an error for invalid numerical group
reference in replacement template even if the pattern is not found in
the string.  Error message for invalid group reference now includes the
group index and the position of the reference.
Based on patch by SilentGhost.

files:
  Lib/sre_parse.py    |  18 +++++++------
  Lib/test/test_re.py |  43 ++++++++++++++++----------------
  Misc/NEWS           |   6 ++++
  3 files changed, 38 insertions(+), 29 deletions(-)


diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -395,7 +395,7 @@
                                        len(escape))
                 state.checklookbehindgroup(group, source)
                 return GROUPREF, group
-            raise source.error("invalid group reference", len(escape))
+            raise source.error("invalid group reference %d" % group, len(escape) - 1)
         if len(escape) == 2:
             if c in ASCIILETTERS:
                 raise source.error("bad escape %s" % escape, len(escape))
@@ -725,8 +725,8 @@
                             raise source.error("bad group number",
                                                len(condname) + 1)
                         if condgroup >= MAXGROUPS:
-                            raise source.error("invalid group reference",
-                                               len(condname) + 1)
+                            msg = "invalid group reference %d" % condgroup
+                            raise source.error(msg, len(condname) + 1)
                     state.checklookbehindgroup(condgroup, source)
                 elif char in FLAGS or char == "-":
                     # flags
@@ -883,7 +883,9 @@
     literals = []
     literal = []
     lappend = literal.append
-    def addgroup(index):
+    def addgroup(index, pos):
+        if index > pattern.groups:
+            raise s.error("invalid group reference %d" % index, pos)
         if literal:
             literals.append(''.join(literal))
             del literal[:]
@@ -916,9 +918,9 @@
                         raise s.error("bad character in group name %r" % name,
                                       len(name) + 1) from None
                     if index >= MAXGROUPS:
-                        raise s.error("invalid group reference",
+                        raise s.error("invalid group reference %d" % index,
                                       len(name) + 1)
-                addgroup(index)
+                addgroup(index, len(name) + 1)
             elif c == "0":
                 if s.next in OCTDIGITS:
                     this += sget()
@@ -939,7 +941,7 @@
                                           'range 0-0o377' % this, len(this))
                         lappend(chr(c))
                 if not isoctal:
-                    addgroup(int(this[1:]))
+                    addgroup(int(this[1:]), len(this) - 1)
             else:
                 try:
                     this = chr(ESCAPES[this][1])
@@ -966,5 +968,5 @@
         for index, group in groups:
             literals[index] = g(group) or empty
     except IndexError:
-        raise error("invalid group reference")
+        raise error("invalid group reference %d" % index)
     return empty.join(literals)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -5,7 +5,6 @@
 import re
 from re import Scanner
 import sre_compile
-import sre_constants
 import sys
 import string
 import traceback
@@ -186,18 +185,19 @@
                                 r'octal escape value \777 outside of '
                                 r'range 0-0o377', 0)
 
-        self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
-        self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
-        self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
-        self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
+        self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
+        self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
+        self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
+        self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
+        self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
+        self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
+        self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
+        self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
+        self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
+        self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
+        self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
+        self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
+        self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
 
         # in python2.3 (etc), these loop endlessly in sre_parser.py
         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
@@ -271,9 +271,9 @@
         self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
                                 "bad character in group name '1a1'", 3)
         self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
-                                'invalid group reference')
+                                'invalid group reference 2', 3)
         self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
-                                'invalid group reference')
+                                'invalid group reference 2', 1)
         with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
             re.sub('(?P<a>x)', r'\g<ab>', 'xx')
         self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
@@ -558,10 +558,11 @@
                                'two branches', 10)
 
     def test_re_groupref_overflow(self):
-        self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx',
-                                'invalid group reference', 3)
-        self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
-                               'invalid group reference', 10)
+        from sre_constants import MAXGROUPS
+        self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
+                                'invalid group reference %d' % MAXGROUPS, 3)
+        self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
+                               'invalid group reference %d' % MAXGROUPS, 10)
 
     def test_re_groupref(self):
         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
@@ -1007,7 +1008,7 @@
         self.checkPatternError(r"\567",
                                r'octal escape value \567 outside of '
                                r'range 0-0o377', 0)
-        self.checkPatternError(r"\911", 'invalid group reference', 0)
+        self.checkPatternError(r"\911", 'invalid group reference 91', 1)
         self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
         self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
         self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
@@ -1061,7 +1062,7 @@
         self.checkPatternError(br"\567",
                                r'octal escape value \567 outside of '
                                r'range 0-0o377', 0)
-        self.checkPatternError(br"\911", 'invalid group reference', 0)
+        self.checkPatternError(br"\911", 'invalid group reference 91', 1)
         self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
         self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -16,6 +16,12 @@
 - Issue #23782: Fixed possible memory leak in _PyTraceback_Add() and exception
   loss in PyTraceBack_Here().
 
+- Issue #25953: re.sub() now raises an error for invalid numerical group
+  reference in replacement template even if the pattern is not found in
+  the string.  Error message for invalid group reference now includes the
+  group index and the position of the reference.
+  Based on patch by SilentGhost.
+
 - Issue #28183: Optimize and cleanup dict iteration.
 
 - Issue #26081: Added C implementation of asyncio.Future.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list