[Python-3000-checkins] r65185 - in python/branches/py3k/Lib: sre_parse.py test/re_tests.py test/test_re.py

antoine.pitrou python-3000-checkins at python.org
Tue Jul 22 19:53:23 CEST 2008


Author: antoine.pitrou
Date: Tue Jul 22 19:53:22 2008
New Revision: 65185

Log:
#3231: re.compile fails with some bytes patterns



Modified:
   python/branches/py3k/Lib/sre_parse.py
   python/branches/py3k/Lib/test/re_tests.py
   python/branches/py3k/Lib/test/test_re.py

Modified: python/branches/py3k/Lib/sre_parse.py
==============================================================================
--- python/branches/py3k/Lib/sre_parse.py	(original)
+++ python/branches/py3k/Lib/sre_parse.py	Tue Jul 22 19:53:22 2008
@@ -200,7 +200,7 @@
             except IndexError:
                 raise error("bogus escape (end of line)")
             if isinstance(self.string, bytes):
-                char = chr(c)
+                c = chr(c)
             char = char + c
         self.index = self.index + len(char)
         self.next = char

Modified: python/branches/py3k/Lib/test/re_tests.py
==============================================================================
--- python/branches/py3k/Lib/test/re_tests.py	(original)
+++ python/branches/py3k/Lib/test/re_tests.py	Tue Jul 22 19:53:22 2008
@@ -661,12 +661,8 @@
     ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
 ]
 
-try:
-    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
-    pass
-else:
-    tests.extend([
+u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
+tests.extend([
     # bug 410271: \b broken under locales
     (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
     (r'(?u)\b.\b', u, SUCCEED, 'found', u),

Modified: python/branches/py3k/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k/Lib/test/test_re.py	(original)
+++ python/branches/py3k/Lib/test/test_re.py	Tue Jul 22 19:53:22 2008
@@ -732,23 +732,25 @@
                 else:
                     print('=== Failed incorrectly', t)
 
-                # Try the match on a unicode string, and check that it
-                # still succeeds.
+                # Try the match with both pattern and string converted to
+                # bytes, and check that it still succeeds.
                 try:
-                    result = obj.search(str(s, "latin-1"))
-                    if result is None:
-                        print('=== Fails on unicode match', t)
-                except NameError:
-                    continue # 1.5.2
-                except TypeError:
-                    continue # unicode test case
-
-                # Try the match on a unicode pattern, and check that it
-                # still succeeds.
-                obj=re.compile(str(pattern, "latin-1"))
-                result = obj.search(s)
-                if result is None:
-                    print('=== Fails on unicode pattern match', t)
+                    bpat = bytes(pattern, "ascii")
+                    bs = bytes(s, "ascii")
+                except UnicodeEncodeError:
+                    # skip non-ascii tests
+                    pass
+                else:
+                    try:
+                        bpat = re.compile(bpat)
+                    except Exception:
+                        print('=== Fails on bytes pattern compile', t)
+                        if verbose:
+                            traceback.print_exc(file=sys.stdout)
+                    else:
+                        bytes_result = bpat.search(bs)
+                        if bytes_result is None:
+                            print('=== Fails on bytes pattern match', t)
 
                 # Try the match with the search area limited to the extent
                 # of the match and see if it still succeeds.  \B will
@@ -771,10 +773,11 @@
 
                 # Try the match with LOCALE enabled, and check that it
                 # still succeeds.
-                obj = re.compile(pattern, re.LOCALE)
-                result = obj.search(s)
-                if result is None:
-                    print('=== Fails on locale-sensitive match', t)
+                if '(?u)' not in pattern:
+                    obj = re.compile(pattern, re.LOCALE)
+                    result = obj.search(s)
+                    if result is None:
+                        print('=== Fails on locale-sensitive match', t)
 
                 # Try the match with UNICODE locale enabled, and check
                 # that it still succeeds.


More information about the Python-3000-checkins mailing list