[pypy-svn] r51892 - in pypy/dist/pypy/rlib/parsing: . test

Wed Feb 27 09:31:36 CET 2008

Author: jared.grubb
Date: Wed Feb 27 09:31:36 2008
New Revision: 51892

Modified:
   pypy/dist/pypy/rlib/parsing/deterministic.py
   pypy/dist/pypy/rlib/parsing/regexparse.py
   pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py
   pypy/dist/pypy/rlib/parsing/test/test_regexparse.py
Log:
rlib.parsing.regexparse: add \d\D\w\W\s\S char classes; make []x] a valid char class that tests for ']' or 'x'; add \e escape option
rlib.parsing.deterministic: remove escaping of ] that i put in last time
rlib.parsing.test.test_regexparse: add tests for new regex features
rlib.parsing.test.test_pcre_regtest: supports testing MANY more of the PCRE tests... still not 100%, but getting there!


Modified: pypy/dist/pypy/rlib/parsing/deterministic.py
==============================================================================

--- pypy/dist/pypy/rlib/parsing/deterministic.py	(original)
+++ pypy/dist/pypy/rlib/parsing/deterministic.py	Wed Feb 27 09:31:36 2008
@@ -34,7 +34,7 @@
 def make_nice_charset_repr(chars):
     # Compress the letters & digits
     letters = set(chars) & set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
-    therest = set(chars) - letters - set(['-',']'])
+    therest = set(chars) - letters - set(['-'])
     charranges = compress_char_set(letters)
     result = []
     for a, num in charranges:
@@ -49,8 +49,6 @@
     # Handle the special chars that MUST get escaped
     if '-' in chars:
         result += ['\\-']
-    if ']' in chars:
-        result += ['\\]']
     return "".join(result)
 
 class LexerError(Exception):

Modified: pypy/dist/pypy/rlib/parsing/regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/regexparse.py	(original)
+++ pypy/dist/pypy/rlib/parsing/regexparse.py	Wed Feb 27 09:31:36 2008
@@ -11,6 +11,7 @@
 ESCAPES = {
     "\\a": "\a",
     "\\b": "\b",
+    "\\e": "\x1b",
     "\\f": "\f",
     "\\n": "\n",
     "\\r": "\r",
@@ -20,7 +21,7 @@
 }
 
 for i in range(256):
-    if chr(i) not in 'x01234567':
+    if chr(i) not in 'x01234567sSwWdD':
         # 'x' and numbers are reserved for hexadecimal/octal escapes
         escaped = "\\" + chr(i)
         if escaped not in ESCAPES:
@@ -120,6 +121,8 @@
 primary:
     ['('] regex [')']
   | range
+  | cc = charclass
+    return {reduce(operator.or_, [regex.RangeExpression(a, chr(ord(a) + b - 1)) for a, b in compress_char_set(cc)])}
   | c = char
     return {regex.StringExpression(c)}
   | '.'
@@ -133,7 +136,7 @@
 
 QUOTEDCHAR:
     `(\\x[0-9a-fA-F]{2})|(\\[0-3]?[0-7][0-7])|(\\c.)|(\\.)`;
-
+    
 CHAR:
     `[^\*\+\(\)\[\]\{\}\|\.\-\?\,\^]`;
 
@@ -150,11 +153,15 @@
   | subrange;
 
 subrange:
+    ']'
     l = rangeelement+
+    return {reduce(operator.or_, [set(["]"])] + l)}
+  | l = rangeelement+
     return {reduce(operator.or_, l)};
 
 rangeelement:
-    c1 = char
+    charclass
+  | c1 = char
     '-'
     c2 = char
     return {set([chr(i) for i in range(ord(c1), ord(c2) + 1)])}
@@ -174,6 +181,25 @@
     ','
     return {n1};
 
+charclass:
+    '\' 'd'
+    return { set([chr(c) for c in range(ord('0'), ord('9')+1)]) }
+  | '\' 
+    's'
+    return { set(['\t', '\n', '\f', '\r', ' ']) }
+  | '\' 
+    'w'
+    return { set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']) }
+  | '\' 
+    'D'
+    return { set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('0'), ord('9')+1)]) }
+  | '\' 
+    'S'
+    return { set([chr(c) for c in range(256)]) - set(['\t', '\n', '\f', '\r', ' ']) }
+  | '\' 
+    'W'
+    return { set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_'])};
+
 NUM:
     c = `0|([1-9][0-9]*)`
     return {int(c)};
@@ -684,6 +710,17 @@
                     self._pos = _choice3
                 _choice4 = self._pos
                 try:
+                    _call_status = self._charclass()
+                    _result = _call_status.result
+                    _error = self._combine_errors(_error, _call_status.error)
+                    cc = _result
+                    _result = (reduce(operator.or_, [regex.RangeExpression(a, chr(ord(a) + b - 1)) for a, b in compress_char_set(cc)]))
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice4
+                _choice5 = self._pos
+                try:
                     _call_status = self._char()
                     _result = _call_status.result
                     _error = self._combine_errors(_error, _call_status.error)
@@ -692,15 +729,15 @@
                     break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
-                    self._pos = _choice4
-                _choice5 = self._pos
+                    self._pos = _choice5
+                _choice6 = self._pos
                 try:
                     _result = self.__chars__('.')
                     _result = (regex.RangeExpression(chr(0), chr(255)))
                     break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
-                    self._pos = _choice5
+                    self._pos = _choice6
                     raise BacktrackException(_error)
                 _result = self.__chars__('.')
                 _result = (regex.RangeExpression(chr(0), chr(255)))
@@ -1041,25 +1078,79 @@
         try:
             _result = None
             _error = None
-            _all0 = []
-            _call_status = self._rangeelement()
-            _result = _call_status.result
-            _error = _call_status.error
-            _all0.append(_result)
             while 1:
-                _choice1 = self._pos
+                _choice0 = self._pos
                 try:
+                    _result = self.__chars__(']')
+                    _all1 = []
                     _call_status = self._rangeelement()
                     _result = _call_status.result
-                    _error = self._combine_errors(_error, _call_status.error)
-                    _all0.append(_result)
+                    _error = _call_status.error
+                    _all1.append(_result)
+                    while 1:
+                        _choice2 = self._pos
+                        try:
+                            _call_status = self._rangeelement()
+                            _result = _call_status.result
+                            _error = self._combine_errors(_error, _call_status.error)
+                            _all1.append(_result)
+                        except BacktrackException, _exc:
+                            _error = self._combine_errors(_error, _exc.error)
+                            self._pos = _choice2
+                            break
+                    _result = _all1
+                    l = _result
+                    _result = (reduce(operator.or_, [set(["]"])] + l))
+                    break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
-                    self._pos = _choice1
+                    self._pos = _choice0
+                _choice3 = self._pos
+                try:
+                    _all4 = []
+                    _call_status = self._rangeelement()
+                    _result = _call_status.result
+                    _error = self._combine_errors(_error, _call_status.error)
+                    _all4.append(_result)
+                    while 1:
+                        _choice5 = self._pos
+                        try:
+                            _call_status = self._rangeelement()
+                            _result = _call_status.result
+                            _error = self._combine_errors(_error, _call_status.error)
+                            _all4.append(_result)
+                        except BacktrackException, _exc:
+                            _error = self._combine_errors(_error, _exc.error)
+                            self._pos = _choice5
+                            break
+                    _result = _all4
+                    l = _result
+                    _result = (reduce(operator.or_, l))
                     break
-            _result = _all0
-            l = _result
-            _result = (reduce(operator.or_, l))
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice3
+                    raise BacktrackException(_error)
+                _all6 = []
+                _call_status = self._rangeelement()
+                _result = _call_status.result
+                _error = self._combine_errors(_error, _call_status.error)
+                _all6.append(_result)
+                while 1:
+                    _choice7 = self._pos
+                    try:
+                        _call_status = self._rangeelement()
+                        _result = _call_status.result
+                        _error = self._combine_errors(_error, _call_status.error)
+                        _all6.append(_result)
+                    except BacktrackException, _exc:
+                        _error = self._combine_errors(_error, _exc.error)
+                        self._pos = _choice7
+                        break
+                _result = _all6
+                l = _result
+                _result = (reduce(operator.or_, l))
+                break
             if _status.status == _status.LEFTRECURSION:
                 if _status.result is not None:
                     if _status.pos >= self._pos:
@@ -1115,9 +1206,18 @@
             while 1:
                 _choice0 = self._pos
                 try:
-                    _call_status = self._char()
+                    _call_status = self._charclass()
                     _result = _call_status.result
                     _error = _call_status.error
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice0
+                _choice1 = self._pos
+                try:
+                    _call_status = self._char()
+                    _result = _call_status.result
+                    _error = self._combine_errors(_error, _call_status.error)
                     c1 = _result
                     _result = self.__chars__('-')
                     _call_status = self._char()
@@ -1128,8 +1228,8 @@
                     break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
-                    self._pos = _choice0
-                _choice1 = self._pos
+                    self._pos = _choice1
+                _choice2 = self._pos
                 try:
                     _call_status = self._char()
                     _result = _call_status.result
@@ -1139,7 +1239,7 @@
                     break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
-                    self._pos = _choice1
+                    self._pos = _choice2
                     raise BacktrackException(_error)
                 _call_status = self._char()
                 _result = _call_status.result
@@ -1316,6 +1416,97 @@
             _status.error = _error
             _status.status = _status.ERROR
             raise BacktrackException(_error)
+    def charclass(self):
+        return self._charclass().result
+    def _charclass(self):
+        _key = self._pos
+        _status = self._dict_charclass.get(_key, None)
+        if _status is None:
+            _status = self._dict_charclass[_key] = Status()
+        else:
+            _statusstatus = _status.status
+            if _statusstatus == _status.NORMAL:
+                self._pos = _status.pos
+                return _status
+            elif _statusstatus == _status.ERROR:
+                raise BacktrackException(_status.error)
+        _startingpos = self._pos
+        try:
+            _result = None
+            _error = None
+            while 1:
+                _choice0 = self._pos
+                try:
+                    _result = self.__chars__('\\')
+                    _result = self.__chars__('d')
+                    _result = ( set([chr(c) for c in range(ord('0'), ord('9')+1)]) )
+                    break
+                except BacktrackException, _exc:
+                    _error = _exc.error
+                    self._pos = _choice0
+                _choice1 = self._pos
+                try:
+                    _result = self.__chars__('\\')
+                    _result = self.__chars__('s')
+                    _result = ( set(['\t', '\n', '\f', '\r', ' ']) )
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice1
+                _choice2 = self._pos
+                try:
+                    _result = self.__chars__('\\')
+                    _result = self.__chars__('w')
+                    _result = ( set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']) )
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice2
+                _choice3 = self._pos
+                try:
+                    _result = self.__chars__('\\')
+                    _result = self.__chars__('D')
+                    _result = ( set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('0'), ord('9')+1)]) )
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice3
+                _choice4 = self._pos
+                try:
+                    _result = self.__chars__('\\')
+                    _result = self.__chars__('S')
+                    _result = ( set([chr(c) for c in range(256)]) - set(['\t', '\n', '\f', '\r', ' ']) )
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice4
+                _choice5 = self._pos
+                try:
+                    _result = self.__chars__('\\')
+                    _result = self.__chars__('W')
+                    _result = ( set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']))
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice5
+                    raise BacktrackException(_error)
+                _result = self.__chars__('\\')
+                _result = self.__chars__('W')
+                _result = ( set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']))
+                break
+            assert _status.status != _status.LEFTRECURSION
+            _status.status = _status.NORMAL
+            _status.pos = self._pos
+            _status.result = _result
+            _status.error = _error
+            return _status
+        except BacktrackException, _exc:
+            _status.pos = -1
+            _status.result = None
+            _error = self._combine_errors(_error, _exc.error)
+            _status.error = _error
+            _status.status = _status.ERROR
+            raise BacktrackException(_error)
     def NUM(self):
         return self._NUM().result
     def _NUM(self):
@@ -1366,6 +1557,7 @@
         self._dict_rangeelement = {}
         self._dict_numrange = {}
         self._dict_clippednumrange = {}
+        self._dict_charclass = {}
         self._dict_NUM = {}
         self._pos = 0
         self._inputstream = inputstream
@@ -1663,6 +1855,13 @@
 
 
 
+
+
+
+
+
+
+
 def test_generate():
     f = py.magic.autopath()
     oldcontent = f.read()

Modified: pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py	(original)
+++ pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py	Wed Feb 27 09:31:36 2008
@@ -6,7 +6,7 @@
 # files with pypy?)
 
 import py
-from pypy.rlib.parsing.regexparse import make_runner, unescape, RegexParser
+from pypy.rlib.parsing.regexparse import make_runner, unescape
 import string
 import re
 
@@ -25,66 +25,75 @@
         
     return test
     
-def get_definition_line(tests, results):
-    """Gets a test definition line, formatted per the PCRE spec."""
-    delim = None
-    test = ''
-    result = ''
-    
-    # A line is marked by a start-delimeter and an end-delimeter.
-    # The delimeter is non-alphanumeric
-    # If a backslash follows the delimiter, then the backslash should
-    #   be appended to the end. (Otherwise, \ + delim would not be a
-    #   delim anymore!)
-    while 1:
-        test += get_simult_lines(tests, results)
+def create_regex_iterator(tests, results):
+    """Gets a test definition line, formatted per the PCRE spec. This is a 
+    generator that returns each regex test."""
+    while tests:
+        delim = None
+        regex = ''
     
-        if delim is None:
-            delim = test[0]
-            assert delim in (set(string.printable) - set(string.letters) - set(string.digits))
-            test_re = re.compile(r'%(delim)s(([^%(delim)s]|\\%(delim)s)*([^\\]))%(delim)s(\\?)(.*)' % {'delim': delim})
-        
-        matches = test_re.findall(test)
-        if matches:
-            break
-
-    assert len(matches)==1
-    test = matches[0][0]
+        # A line is marked by a start-delimeter and an end-delimeter.
+        # The delimeter is non-alphanumeric
+        # If a backslash follows the delimiter, then the backslash should
+        #   be appended to the end. (Otherwise, \ + delim would not be a
+        #   delim anymore!)
+        while 1:
+            regex += get_simult_lines(tests, results)
     
-    # Add the backslash, if we gotta
-    test += matches[0][-2]
-    flags = matches[0][-1]
+            if delim is None:
+                delim = regex[0]
+                assert delim in (set(string.printable) - set(string.letters) - set(string.digits))
+                test_re = re.compile(r'%(delim)s(([^%(delim)s]|\\%(delim)s)*([^\\]))%(delim)s(\\?)(.*)' % {'delim': delim})
+                # last two groups are an optional backslash and optional flags
+            
+            matches = test_re.findall(regex)
+            if matches:
+                break
 
-    return test, flags
+        assert len(matches)==1
     
-def get_test_result(tests, results):
-    """Gets the expected return from the regular expression"""
+        regex = matches[0][0]
+        regex += matches[0][-2] # Add the backslash, if we gotta
+        flags = matches[0][-1] # Get the flags for the regex
+
+        yield regex, flags
+
+def create_result_iterator(tests, results):
+    """Gets the expected return sets for each regular expression."""
     # Second line is the test to run against the regex
     # '    TEXT'
-    test = get_simult_lines(tests, results)
-    if not test:
-        return None, None
-    if not test.startswith('    '):
-        raise Exception("Input & output match, but I don't understand. (Got %r)" % test)
-    test = unescape(test[4:])
-    
-    # Third line in the OUTPUT is the result, either:
-    # ' 0: ...' for a match
-    # 'No match' for no match
-    result = unescape(results.pop(0))
-    if result == 'No match':
-        pass
-    elif result.startswith(' 0: '):
-        # Now we need to eat any further lines like:
-        # ' 1: ....' a subgroup match
-        while results[0]:
-            if results[0][2] == ':':
-                results.pop(0)
-            else:
-                break
-    else:
-        raise Exception("Lost sync in output.")
-    return test, result
+    while 1:
+        test = get_simult_lines(tests, results)
+        if not test:
+            raise StopIteration
+        if not test.startswith('    '):
+            raise Exception("Input & output match, but I don't understand. (Got %r)" % test)
+        if test.endswith('\\'): # Tests that end in \ expect the \ to be chopped off
+            assert not test.endswith('\\\\')    # make sure there are no \\ at end
+            test = test[:-1]
+        test = unescape(test[4:])
+    
+        # Third line in the OUTPUT is the result, either:
+        # ' 0: ...' for a match (but this is ONLY escaped by \x__ types)
+        # 'No match' for no match
+        result = results.pop(0)
+        result = re.sub(r'\\x([0-9a-fA-F]{2})', lambda m: chr(int(m.group(1),16)), result)
+        if result == 'No match':
+            pass
+        elif result.startswith(' 0:'):
+            # Now we need to eat any further lines like:
+            # ' 1: ....' a subgroup match
+            while results[0]:
+                if results[0][2] == ':':
+                    results.pop(0)
+                else:
+                    break
+        else:
+            raise Exception("Lost sync in output.")
+        yield test, result
+    
+class SkipException(Exception):
+    pass
     
 def test_file():
     """Open the PCRE tests and run them."""
@@ -95,60 +104,62 @@
                            'i': lambda s: s.upper()
                          }
     
+    regex_set = create_regex_iterator(tests, results)    
     import pdb
-    while tests:
-        # First line is a test, in the form:
-        # '/regex expression/FLAGS'
-        regex, regex_flags = get_definition_line(tests, results)
-
-        # Handle the flags:
+    for regex, regex_flags in regex_set:
         try:
-            text_prepare = regex_flag_mapping[regex_flags]
-        except KeyError:
-            print "UNKNOWN FLAGS: %s" % regex_flags
-            continue
-        
-        print '%r' % regex
+            print '%r' % regex
 
-        skipped = any([op in regex for op in ['*?', '??', '+?', '}?']])        
-        if skipped:
-            print "  SKIPPED (cant do non-greedy operators)"
-            # now burn all the tests for this regex
-            while 1:
-                test, result = get_test_result(tests, results)
-                if not test:
-                    break   # A blank line means we have nothing to do
-            continue
+            # Create an iterator to grab the test/results for this regex
+            result_set = create_result_iterator(tests, results)
+
+            # Handle the flags:
+            if regex_flags in regex_flag_mapping:
+                text_prepare = regex_flag_mapping[regex_flags]
+            elif 'x' in regex_flags:
+                raise SkipException("Cant do extended PRCE expressions")            
+            else:
+                print "UNKNOWN FLAGS: %s" % regex_flags
+                continue
+        
+            skipped = any([op in regex for op in ['*?', '??', '+?', '}?', '(?']])        
+            if skipped:
+                raise SkipException("Cant do non-greedy operators or '(?' constructions)")
                 
-        regex_to_use = text_prepare(regex)
+            regex_to_use = text_prepare(regex)
         
-        anchor_left = regex_to_use.startswith('^')
-        anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
-        if anchor_left:
-            regex_to_use = regex_to_use[1:]   # chop the ^ if it's there
-        if anchor_right:
-            regex_to_use = regex_to_use[:-1]  # chop the $ if it's there
+            anchor_left = regex_to_use.startswith('^')
+            anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
+            if anchor_left:
+                regex_to_use = regex_to_use[1:]   # chop the ^ if it's there
+            if anchor_right:
+                regex_to_use = regex_to_use[:-1]  # chop the $ if it's there
         
+            if not regex_to_use:
+                raise SkipException("Cant do blank regex")
+        except SkipException, e:
+            print "  SKIPPED (%s)" % e.message
+            # now burn all the tests for this regex
+            for _ in result_set:
+                pass
+            continue
+            
         # Finally, we make the pypy regex runner
         runner = make_runner(regex_to_use)
-
+        
         # Now run the test expressions against the Regex
-        while 1:
-            test, result = get_test_result(tests, results)
-            if not test:
-                break   # A blank line means we have nothing to do
-                
+        for test, result in result_set:
             # Create possible subsequences that we should test
             if anchor_left:
-                subseq_gen = [0]
+                start_range = [0]
             else:
-                subseq_gen = (start for start in range(0, len(test)))
+                start_range = range(0, len(test))
             
             if anchor_right:
-                subseq_gen = ( (start, len(test)) for start in subseq_gen )
+                subseq_gen = ( (start, len(test)) for start in start_range )
             else:
                 # Go backwards to simulate greediness
-                subseq_gen = ( (start, end) for start in subseq_gen for end in range(len(test)+1, start+1, -1) )
+                subseq_gen = ( (start, end) for start in start_range for end in range(len(test)+1, start, -1) )
 
             # Search the possibilities for a match...
             for start, end in subseq_gen:
@@ -162,11 +173,11 @@
                 if matched:
                     print "  FALSE MATCH: regex==%r test==%r" % (regex, test)
                 else:
-                    print "  pass       : regex==%r test==%r" % (regex, test)
+                    print "  pass:        regex==%r test==%r" % (regex, test)
             elif result.startswith(' 0: '):
                 if not matched:
                     print "  MISSED:      regex==%r test==%r" % (regex, test)
                 elif not attempt==text_prepare(result[4:]):
                     print "  BAD MATCH:   regex==%r test==%r found==%r expect==%r" % (regex, test, attempt, result[4:])
                 else:
-                    print "  pass       : regex==%r test==%r" % (regex, test)
+                    print "  pass:        regex==%r test==%r" % (regex, test)

Modified: pypy/dist/pypy/rlib/parsing/test/test_regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_regexparse.py	(original)
+++ pypy/dist/pypy/rlib/parsing/test/test_regexparse.py	Wed Feb 27 09:31:36 2008
@@ -33,6 +33,11 @@
     assert r.recognize("aaaaaa")
     assert not r.recognize("a")
     assert not r.recognize("aabb")
+    r = make_runner("(\\x61a)*")
+    assert r.recognize("aa")
+    assert r.recognize("aaaaaa")
+    assert not r.recognize("a")
+    assert not r.recognize("aabb")
 
 def test_range():
     r = make_runner("[A-Z]")
@@ -165,3 +170,64 @@
     assert r.recognize("-0.912E+0001")
     assert not r.recognize("-0.a912E+0001")
     assert r.recognize("5")
+    
+def test_charclass():
+    r = make_runner(r"\d")
+    assert r.recognize('0')
+    assert r.recognize('5')
+    assert r.recognize('9')
+    assert not r.recognize('d')
+    r = make_runner(r"\d{2,}")
+    assert r.recognize('09')
+    assert r.recognize('158')
+    assert not r.recognize('1')
+    r = make_runner(r"\D")
+    assert r.recognize('d')
+    assert r.recognize('\n')
+    assert not r.recognize('0')
+    assert not r.recognize('1234')
+    r = make_runner(r"\s\S")
+    assert r.recognize(' d')
+    assert r.recognize('\t9')
+    assert not r.recognize('d ')
+    assert not r.recognize('99')
+    assert not r.recognize('\r\r')
+    r = make_runner(r"\w+")
+    assert r.recognize('word')
+    assert r.recognize('variable_name')
+    assert r.recognize('abc123')
+    assert not r.recognize('word\n')
+    assert not r.recognize('hey hey')
+    r = make_runner(r"\w\W\w")
+    assert r.recognize('9 9')
+    assert r.recognize('_\fx')
+    assert not r.recognize('\n\r\t')
+
+def test_charclass_in_range():
+    r = make_runner(r"[\de]")
+    assert r.recognize('0')
+    assert r.recognize('5')
+    assert r.recognize('9')
+    assert r.recognize('e')
+    assert not r.recognize('d')
+    r = make_runner(r"[\de]{2,}")
+    assert r.recognize('09')
+    assert r.recognize('158')
+    assert r.recognize('3eee')
+    assert not r.recognize('1')
+    assert not r.recognize('ddee')
+    r = make_runner(r"[\D5]")
+    assert r.recognize('d')
+    assert r.recognize('\n')
+    assert r.recognize('5')
+    assert not r.recognize('0')
+    r = make_runner(r"[\s][\S]")
+    assert r.recognize(' d')
+    assert r.recognize('\t9')
+    assert not r.recognize('d ')
+    assert not r.recognize('99')
+    assert not r.recognize('\r\r')
+    r = make_runner(r"[\w]+\W[\w]+")
+    assert r.recognize('hey hey')
+    assert not r.recognize('word')
+    assert not r.recognize('variable_name')