[pypy-svn] r51892 - in pypy/dist/pypy/rlib/parsing: . test
jared.grubb at codespeak.net
jared.grubb at codespeak.net
Wed Feb 27 09:31:36 CET 2008
Author: jared.grubb
Date: Wed Feb 27 09:31:36 2008
New Revision: 51892
Modified:
pypy/dist/pypy/rlib/parsing/deterministic.py
pypy/dist/pypy/rlib/parsing/regexparse.py
pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py
pypy/dist/pypy/rlib/parsing/test/test_regexparse.py
Log:
rlib.parsing.regexparse: add \d\D\w\W\s\S char classes; make []x] a valid char class that tests for ']' or 'x'; add \e escape option
rlib.parsing.deterministic: remove escaping of ] that i put in last time
rlib.parsing.test.test_regexparse: add tests for new regex features
rlib.parsing.test.test_pcre_regtest: supports testing MANY more of the PCRE tests... still not 100%, but getting there!
Modified: pypy/dist/pypy/rlib/parsing/deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/deterministic.py (original)
+++ pypy/dist/pypy/rlib/parsing/deterministic.py Wed Feb 27 09:31:36 2008
@@ -34,7 +34,7 @@
def make_nice_charset_repr(chars):
# Compress the letters & digits
letters = set(chars) & set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
- therest = set(chars) - letters - set(['-',']'])
+ therest = set(chars) - letters - set(['-'])
charranges = compress_char_set(letters)
result = []
for a, num in charranges:
@@ -49,8 +49,6 @@
# Handle the special chars that MUST get escaped
if '-' in chars:
result += ['\\-']
- if ']' in chars:
- result += ['\\]']
return "".join(result)
class LexerError(Exception):
Modified: pypy/dist/pypy/rlib/parsing/regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/regexparse.py (original)
+++ pypy/dist/pypy/rlib/parsing/regexparse.py Wed Feb 27 09:31:36 2008
@@ -11,6 +11,7 @@
ESCAPES = {
"\\a": "\a",
"\\b": "\b",
+ "\\e": "\x1b",
"\\f": "\f",
"\\n": "\n",
"\\r": "\r",
@@ -20,7 +21,7 @@
}
for i in range(256):
- if chr(i) not in 'x01234567':
+ if chr(i) not in 'x01234567sSwWdD':
# 'x' and numbers are reserved for hexadecimal/octal escapes
escaped = "\\" + chr(i)
if escaped not in ESCAPES:
@@ -120,6 +121,8 @@
primary:
['('] regex [')']
| range
+ | cc = charclass
+ return {reduce(operator.or_, [regex.RangeExpression(a, chr(ord(a) + b - 1)) for a, b in compress_char_set(cc)])}
| c = char
return {regex.StringExpression(c)}
| '.'
@@ -133,7 +136,7 @@
QUOTEDCHAR:
`(\\x[0-9a-fA-F]{2})|(\\[0-3]?[0-7][0-7])|(\\c.)|(\\.)`;
-
+
CHAR:
`[^\*\+\(\)\[\]\{\}\|\.\-\?\,\^]`;
@@ -150,11 +153,15 @@
| subrange;
subrange:
+ ']'
l = rangeelement+
+ return {reduce(operator.or_, [set(["]"])] + l)}
+ | l = rangeelement+
return {reduce(operator.or_, l)};
rangeelement:
- c1 = char
+ charclass
+ | c1 = char
'-'
c2 = char
return {set([chr(i) for i in range(ord(c1), ord(c2) + 1)])}
@@ -174,6 +181,25 @@
','
return {n1};
+charclass:
+ '\' 'd'
+ return { set([chr(c) for c in range(ord('0'), ord('9')+1)]) }
+ | '\'
+ 's'
+ return { set(['\t', '\n', '\f', '\r', ' ']) }
+ | '\'
+ 'w'
+ return { set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']) }
+ | '\'
+ 'D'
+ return { set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('0'), ord('9')+1)]) }
+ | '\'
+ 'S'
+ return { set([chr(c) for c in range(256)]) - set(['\t', '\n', '\f', '\r', ' ']) }
+ | '\'
+ 'W'
+ return { set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_'])};
+
NUM:
c = `0|([1-9][0-9]*)`
return {int(c)};
@@ -684,6 +710,17 @@
self._pos = _choice3
_choice4 = self._pos
try:
+ _call_status = self._charclass()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
+ cc = _result
+ _result = (reduce(operator.or_, [regex.RangeExpression(a, chr(ord(a) + b - 1)) for a, b in compress_char_set(cc)]))
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice4
+ _choice5 = self._pos
+ try:
_call_status = self._char()
_result = _call_status.result
_error = self._combine_errors(_error, _call_status.error)
@@ -692,15 +729,15 @@
break
except BacktrackException, _exc:
_error = self._combine_errors(_error, _exc.error)
- self._pos = _choice4
- _choice5 = self._pos
+ self._pos = _choice5
+ _choice6 = self._pos
try:
_result = self.__chars__('.')
_result = (regex.RangeExpression(chr(0), chr(255)))
break
except BacktrackException, _exc:
_error = self._combine_errors(_error, _exc.error)
- self._pos = _choice5
+ self._pos = _choice6
raise BacktrackException(_error)
_result = self.__chars__('.')
_result = (regex.RangeExpression(chr(0), chr(255)))
@@ -1041,25 +1078,79 @@
try:
_result = None
_error = None
- _all0 = []
- _call_status = self._rangeelement()
- _result = _call_status.result
- _error = _call_status.error
- _all0.append(_result)
while 1:
- _choice1 = self._pos
+ _choice0 = self._pos
try:
+ _result = self.__chars__(']')
+ _all1 = []
_call_status = self._rangeelement()
_result = _call_status.result
- _error = self._combine_errors(_error, _call_status.error)
- _all0.append(_result)
+ _error = _call_status.error
+ _all1.append(_result)
+ while 1:
+ _choice2 = self._pos
+ try:
+ _call_status = self._rangeelement()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
+ _all1.append(_result)
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice2
+ break
+ _result = _all1
+ l = _result
+ _result = (reduce(operator.or_, [set(["]"])] + l))
+ break
except BacktrackException, _exc:
_error = self._combine_errors(_error, _exc.error)
- self._pos = _choice1
+ self._pos = _choice0
+ _choice3 = self._pos
+ try:
+ _all4 = []
+ _call_status = self._rangeelement()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
+ _all4.append(_result)
+ while 1:
+ _choice5 = self._pos
+ try:
+ _call_status = self._rangeelement()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
+ _all4.append(_result)
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice5
+ break
+ _result = _all4
+ l = _result
+ _result = (reduce(operator.or_, l))
break
- _result = _all0
- l = _result
- _result = (reduce(operator.or_, l))
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice3
+ raise BacktrackException(_error)
+ _all6 = []
+ _call_status = self._rangeelement()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
+ _all6.append(_result)
+ while 1:
+ _choice7 = self._pos
+ try:
+ _call_status = self._rangeelement()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
+ _all6.append(_result)
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice7
+ break
+ _result = _all6
+ l = _result
+ _result = (reduce(operator.or_, l))
+ break
if _status.status == _status.LEFTRECURSION:
if _status.result is not None:
if _status.pos >= self._pos:
@@ -1115,9 +1206,18 @@
while 1:
_choice0 = self._pos
try:
- _call_status = self._char()
+ _call_status = self._charclass()
_result = _call_status.result
_error = _call_status.error
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice0
+ _choice1 = self._pos
+ try:
+ _call_status = self._char()
+ _result = _call_status.result
+ _error = self._combine_errors(_error, _call_status.error)
c1 = _result
_result = self.__chars__('-')
_call_status = self._char()
@@ -1128,8 +1228,8 @@
break
except BacktrackException, _exc:
_error = self._combine_errors(_error, _exc.error)
- self._pos = _choice0
- _choice1 = self._pos
+ self._pos = _choice1
+ _choice2 = self._pos
try:
_call_status = self._char()
_result = _call_status.result
@@ -1139,7 +1239,7 @@
break
except BacktrackException, _exc:
_error = self._combine_errors(_error, _exc.error)
- self._pos = _choice1
+ self._pos = _choice2
raise BacktrackException(_error)
_call_status = self._char()
_result = _call_status.result
@@ -1316,6 +1416,97 @@
_status.error = _error
_status.status = _status.ERROR
raise BacktrackException(_error)
+ def charclass(self):
+ return self._charclass().result
+ def _charclass(self):
+ _key = self._pos
+ _status = self._dict_charclass.get(_key, None)
+ if _status is None:
+ _status = self._dict_charclass[_key] = Status()
+ else:
+ _statusstatus = _status.status
+ if _statusstatus == _status.NORMAL:
+ self._pos = _status.pos
+ return _status
+ elif _statusstatus == _status.ERROR:
+ raise BacktrackException(_status.error)
+ _startingpos = self._pos
+ try:
+ _result = None
+ _error = None
+ while 1:
+ _choice0 = self._pos
+ try:
+ _result = self.__chars__('\\')
+ _result = self.__chars__('d')
+ _result = ( set([chr(c) for c in range(ord('0'), ord('9')+1)]) )
+ break
+ except BacktrackException, _exc:
+ _error = _exc.error
+ self._pos = _choice0
+ _choice1 = self._pos
+ try:
+ _result = self.__chars__('\\')
+ _result = self.__chars__('s')
+ _result = ( set(['\t', '\n', '\f', '\r', ' ']) )
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice1
+ _choice2 = self._pos
+ try:
+ _result = self.__chars__('\\')
+ _result = self.__chars__('w')
+ _result = ( set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']) )
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice2
+ _choice3 = self._pos
+ try:
+ _result = self.__chars__('\\')
+ _result = self.__chars__('D')
+ _result = ( set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('0'), ord('9')+1)]) )
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice3
+ _choice4 = self._pos
+ try:
+ _result = self.__chars__('\\')
+ _result = self.__chars__('S')
+ _result = ( set([chr(c) for c in range(256)]) - set(['\t', '\n', '\f', '\r', ' ']) )
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice4
+ _choice5 = self._pos
+ try:
+ _result = self.__chars__('\\')
+ _result = self.__chars__('W')
+ _result = ( set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']))
+ break
+ except BacktrackException, _exc:
+ _error = self._combine_errors(_error, _exc.error)
+ self._pos = _choice5
+ raise BacktrackException(_error)
+ _result = self.__chars__('\\')
+ _result = self.__chars__('W')
+ _result = ( set([chr(c) for c in range(256)]) - set([chr(c) for c in range(ord('a'), ord('z')+1)] + [chr(c) for c in range(ord('A'), ord('Z')+1)] + [chr(c) for c in range(ord('0'), ord('9')+1)] + ['_']))
+ break
+ assert _status.status != _status.LEFTRECURSION
+ _status.status = _status.NORMAL
+ _status.pos = self._pos
+ _status.result = _result
+ _status.error = _error
+ return _status
+ except BacktrackException, _exc:
+ _status.pos = -1
+ _status.result = None
+ _error = self._combine_errors(_error, _exc.error)
+ _status.error = _error
+ _status.status = _status.ERROR
+ raise BacktrackException(_error)
def NUM(self):
return self._NUM().result
def _NUM(self):
@@ -1366,6 +1557,7 @@
self._dict_rangeelement = {}
self._dict_numrange = {}
self._dict_clippednumrange = {}
+ self._dict_charclass = {}
self._dict_NUM = {}
self._pos = 0
self._inputstream = inputstream
@@ -1663,6 +1855,13 @@
+
+
+
+
+
+
+
def test_generate():
f = py.magic.autopath()
oldcontent = f.read()
Modified: pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py (original)
+++ pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py Wed Feb 27 09:31:36 2008
@@ -6,7 +6,7 @@
# files with pypy?)
import py
-from pypy.rlib.parsing.regexparse import make_runner, unescape, RegexParser
+from pypy.rlib.parsing.regexparse import make_runner, unescape
import string
import re
@@ -25,66 +25,75 @@
return test
-def get_definition_line(tests, results):
- """Gets a test definition line, formatted per the PCRE spec."""
- delim = None
- test = ''
- result = ''
-
- # A line is marked by a start-delimeter and an end-delimeter.
- # The delimeter is non-alphanumeric
- # If a backslash follows the delimiter, then the backslash should
- # be appended to the end. (Otherwise, \ + delim would not be a
- # delim anymore!)
- while 1:
- test += get_simult_lines(tests, results)
+def create_regex_iterator(tests, results):
+ """Gets a test definition line, formatted per the PCRE spec. This is a
+ generator that returns each regex test."""
+ while tests:
+ delim = None
+ regex = ''
- if delim is None:
- delim = test[0]
- assert delim in (set(string.printable) - set(string.letters) - set(string.digits))
- test_re = re.compile(r'%(delim)s(([^%(delim)s]|\\%(delim)s)*([^\\]))%(delim)s(\\?)(.*)' % {'delim': delim})
-
- matches = test_re.findall(test)
- if matches:
- break
-
- assert len(matches)==1
- test = matches[0][0]
+ # A line is marked by a start-delimeter and an end-delimeter.
+ # The delimeter is non-alphanumeric
+ # If a backslash follows the delimiter, then the backslash should
+ # be appended to the end. (Otherwise, \ + delim would not be a
+ # delim anymore!)
+ while 1:
+ regex += get_simult_lines(tests, results)
- # Add the backslash, if we gotta
- test += matches[0][-2]
- flags = matches[0][-1]
+ if delim is None:
+ delim = regex[0]
+ assert delim in (set(string.printable) - set(string.letters) - set(string.digits))
+ test_re = re.compile(r'%(delim)s(([^%(delim)s]|\\%(delim)s)*([^\\]))%(delim)s(\\?)(.*)' % {'delim': delim})
+ # last two groups are an optional backslash and optional flags
+
+ matches = test_re.findall(regex)
+ if matches:
+ break
- return test, flags
+ assert len(matches)==1
-def get_test_result(tests, results):
- """Gets the expected return from the regular expression"""
+ regex = matches[0][0]
+ regex += matches[0][-2] # Add the backslash, if we gotta
+ flags = matches[0][-1] # Get the flags for the regex
+
+ yield regex, flags
+
+def create_result_iterator(tests, results):
+ """Gets the expected return sets for each regular expression."""
# Second line is the test to run against the regex
# ' TEXT'
- test = get_simult_lines(tests, results)
- if not test:
- return None, None
- if not test.startswith(' '):
- raise Exception("Input & output match, but I don't understand. (Got %r)" % test)
- test = unescape(test[4:])
-
- # Third line in the OUTPUT is the result, either:
- # ' 0: ...' for a match
- # 'No match' for no match
- result = unescape(results.pop(0))
- if result == 'No match':
- pass
- elif result.startswith(' 0: '):
- # Now we need to eat any further lines like:
- # ' 1: ....' a subgroup match
- while results[0]:
- if results[0][2] == ':':
- results.pop(0)
- else:
- break
- else:
- raise Exception("Lost sync in output.")
- return test, result
+ while 1:
+ test = get_simult_lines(tests, results)
+ if not test:
+ raise StopIteration
+ if not test.startswith(' '):
+ raise Exception("Input & output match, but I don't understand. (Got %r)" % test)
+ if test.endswith('\\'): # Tests that end in \ expect the \ to be chopped off
+ assert not test.endswith('\\\\') # make sure there are no \\ at end
+ test = test[:-1]
+ test = unescape(test[4:])
+
+ # Third line in the OUTPUT is the result, either:
+ # ' 0: ...' for a match (but this is ONLY escaped by \x__ types)
+ # 'No match' for no match
+ result = results.pop(0)
+ result = re.sub(r'\\x([0-9a-fA-F]{2})', lambda m: chr(int(m.group(1),16)), result)
+ if result == 'No match':
+ pass
+ elif result.startswith(' 0:'):
+ # Now we need to eat any further lines like:
+ # ' 1: ....' a subgroup match
+ while results[0]:
+ if results[0][2] == ':':
+ results.pop(0)
+ else:
+ break
+ else:
+ raise Exception("Lost sync in output.")
+ yield test, result
+
+class SkipException(Exception):
+ pass
def test_file():
"""Open the PCRE tests and run them."""
@@ -95,60 +104,62 @@
'i': lambda s: s.upper()
}
+ regex_set = create_regex_iterator(tests, results)
import pdb
- while tests:
- # First line is a test, in the form:
- # '/regex expression/FLAGS'
- regex, regex_flags = get_definition_line(tests, results)
-
- # Handle the flags:
+ for regex, regex_flags in regex_set:
try:
- text_prepare = regex_flag_mapping[regex_flags]
- except KeyError:
- print "UNKNOWN FLAGS: %s" % regex_flags
- continue
-
- print '%r' % regex
+ print '%r' % regex
- skipped = any([op in regex for op in ['*?', '??', '+?', '}?']])
- if skipped:
- print " SKIPPED (cant do non-greedy operators)"
- # now burn all the tests for this regex
- while 1:
- test, result = get_test_result(tests, results)
- if not test:
- break # A blank line means we have nothing to do
- continue
+ # Create an iterator to grab the test/results for this regex
+ result_set = create_result_iterator(tests, results)
+
+ # Handle the flags:
+ if regex_flags in regex_flag_mapping:
+ text_prepare = regex_flag_mapping[regex_flags]
+ elif 'x' in regex_flags:
+ raise SkipException("Cant do extended PRCE expressions")
+ else:
+ print "UNKNOWN FLAGS: %s" % regex_flags
+ continue
+
+ skipped = any([op in regex for op in ['*?', '??', '+?', '}?', '(?']])
+ if skipped:
+ raise SkipException("Cant do non-greedy operators or '(?' constructions)")
- regex_to_use = text_prepare(regex)
+ regex_to_use = text_prepare(regex)
- anchor_left = regex_to_use.startswith('^')
- anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
- if anchor_left:
- regex_to_use = regex_to_use[1:] # chop the ^ if it's there
- if anchor_right:
- regex_to_use = regex_to_use[:-1] # chop the $ if it's there
+ anchor_left = regex_to_use.startswith('^')
+ anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
+ if anchor_left:
+ regex_to_use = regex_to_use[1:] # chop the ^ if it's there
+ if anchor_right:
+ regex_to_use = regex_to_use[:-1] # chop the $ if it's there
+ if not regex_to_use:
+ raise SkipException("Cant do blank regex")
+ except SkipException, e:
+ print " SKIPPED (%s)" % e.message
+ # now burn all the tests for this regex
+ for _ in result_set:
+ pass
+ continue
+
# Finally, we make the pypy regex runner
runner = make_runner(regex_to_use)
-
+
# Now run the test expressions against the Regex
- while 1:
- test, result = get_test_result(tests, results)
- if not test:
- break # A blank line means we have nothing to do
-
+ for test, result in result_set:
# Create possible subsequences that we should test
if anchor_left:
- subseq_gen = [0]
+ start_range = [0]
else:
- subseq_gen = (start for start in range(0, len(test)))
+ start_range = range(0, len(test))
if anchor_right:
- subseq_gen = ( (start, len(test)) for start in subseq_gen )
+ subseq_gen = ( (start, len(test)) for start in start_range )
else:
# Go backwards to simulate greediness
- subseq_gen = ( (start, end) for start in subseq_gen for end in range(len(test)+1, start+1, -1) )
+ subseq_gen = ( (start, end) for start in start_range for end in range(len(test)+1, start, -1) )
# Search the possibilities for a match...
for start, end in subseq_gen:
@@ -162,11 +173,11 @@
if matched:
print " FALSE MATCH: regex==%r test==%r" % (regex, test)
else:
- print " pass : regex==%r test==%r" % (regex, test)
+ print " pass: regex==%r test==%r" % (regex, test)
elif result.startswith(' 0: '):
if not matched:
print " MISSED: regex==%r test==%r" % (regex, test)
elif not attempt==text_prepare(result[4:]):
print " BAD MATCH: regex==%r test==%r found==%r expect==%r" % (regex, test, attempt, result[4:])
else:
- print " pass : regex==%r test==%r" % (regex, test)
+ print " pass: regex==%r test==%r" % (regex, test)
Modified: pypy/dist/pypy/rlib/parsing/test/test_regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_regexparse.py (original)
+++ pypy/dist/pypy/rlib/parsing/test/test_regexparse.py Wed Feb 27 09:31:36 2008
@@ -33,6 +33,11 @@
assert r.recognize("aaaaaa")
assert not r.recognize("a")
assert not r.recognize("aabb")
+ r = make_runner("(\\x61a)*")
+ assert r.recognize("aa")
+ assert r.recognize("aaaaaa")
+ assert not r.recognize("a")
+ assert not r.recognize("aabb")
def test_range():
r = make_runner("[A-Z]")
@@ -165,3 +170,64 @@
assert r.recognize("-0.912E+0001")
assert not r.recognize("-0.a912E+0001")
assert r.recognize("5")
+
+def test_charclass():
+ r = make_runner(r"\d")
+ assert r.recognize('0')
+ assert r.recognize('5')
+ assert r.recognize('9')
+ assert not r.recognize('d')
+ r = make_runner(r"\d{2,}")
+ assert r.recognize('09')
+ assert r.recognize('158')
+ assert not r.recognize('1')
+ r = make_runner(r"\D")
+ assert r.recognize('d')
+ assert r.recognize('\n')
+ assert not r.recognize('0')
+ assert not r.recognize('1234')
+ r = make_runner(r"\s\S")
+ assert r.recognize(' d')
+ assert r.recognize('\t9')
+ assert not r.recognize('d ')
+ assert not r.recognize('99')
+ assert not r.recognize('\r\r')
+ r = make_runner(r"\w+")
+ assert r.recognize('word')
+ assert r.recognize('variable_name')
+ assert r.recognize('abc123')
+ assert not r.recognize('word\n')
+ assert not r.recognize('hey hey')
+ r = make_runner(r"\w\W\w")
+ assert r.recognize('9 9')
+ assert r.recognize('_\fx')
+ assert not r.recognize('\n\r\t')
+
+def test_charclass_in_range():
+ r = make_runner(r"[\de]")
+ assert r.recognize('0')
+ assert r.recognize('5')
+ assert r.recognize('9')
+ assert r.recognize('e')
+ assert not r.recognize('d')
+ r = make_runner(r"[\de]{2,}")
+ assert r.recognize('09')
+ assert r.recognize('158')
+ assert r.recognize('3eee')
+ assert not r.recognize('1')
+ assert not r.recognize('ddee')
+ r = make_runner(r"[\D5]")
+ assert r.recognize('d')
+ assert r.recognize('\n')
+ assert r.recognize('5')
+ assert not r.recognize('0')
+ r = make_runner(r"[\s][\S]")
+ assert r.recognize(' d')
+ assert r.recognize('\t9')
+ assert not r.recognize('d ')
+ assert not r.recognize('99')
+ assert not r.recognize('\r\r')
+ r = make_runner(r"[\w]+\W[\w]+")
+ assert r.recognize('hey hey')
+ assert not r.recognize('word')
+ assert not r.recognize('variable_name')
More information about the Pypy-commit
mailing list