[pypy-svn] r15933 - in pypy/dist/pypy/module/_sre: . test
nik at codespeak.net
nik at codespeak.net
Wed Aug 10 19:17:31 CEST 2005
Author: nik
Date: Wed Aug 10 19:17:29 2005
New Revision: 15933
Modified:
pypy/dist/pypy/module/_sre/__init__.py
pypy/dist/pypy/module/_sre/app_sre.py
pypy/dist/pypy/module/_sre/interp_sre.py
pypy/dist/pypy/module/_sre/test/test_app_sre.py
Log:
moved more functions from app- to interp-level. added interp-level tests.
Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py (original)
+++ pypy/dist/pypy/module/_sre/__init__.py Wed Aug 10 19:17:29 2005
@@ -21,5 +21,8 @@
'_is_digit': 'interp_sre._is_digit',
'_is_space': 'interp_sre._is_space',
'_is_word': 'interp_sre._is_word',
+ '_is_uni_word': 'interp_sre._is_uni_word',
+ '_is_loc_word': 'interp_sre._is_loc_word',
'_is_linebreak': 'interp_sre._is_linebreak',
+ '_is_uni_linebreak': 'interp_sre._is_uni_linebreak',
}
Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py (original)
+++ pypy/dist/pypy/module/_sre/app_sre.py Wed Aug 10 19:17:29 2005
@@ -1170,13 +1170,13 @@
def at_non_boundary(self, ctx):
return not ctx.at_boundary(_sre._is_word)
def at_loc_boundary(self, ctx):
- return ctx.at_boundary(_is_loc_word)
+ return ctx.at_boundary(_sre._is_loc_word)
def at_loc_non_boundary(self, ctx):
- return not ctx.at_boundary(_is_loc_word)
+ return not ctx.at_boundary(_sre._is_loc_word)
def at_uni_boundary(self, ctx):
- return ctx.at_boundary(_is_uni_word)
+ return ctx.at_boundary(_sre._is_uni_word)
def at_uni_non_boundary(self, ctx):
- return not ctx.at_boundary(_is_uni_word)
+ return not ctx.at_boundary(_sre._is_uni_word)
def unknown(self, ctx):
return False
@@ -1202,9 +1202,9 @@
def category_not_linebreak(self, ctx):
return not _sre._is_linebreak(ctx.peek_char())
def category_loc_word(self, ctx):
- return _is_loc_word(ctx.peek_char())
+ return _sre._is_loc_word(ctx.peek_char())
def category_loc_not_word(self, ctx):
- return not _is_loc_word(ctx.peek_char())
+ return not _sre._is_loc_word(ctx.peek_char())
def category_uni_digit(self, ctx):
return ctx.peek_char().isdigit()
def category_uni_not_digit(self, ctx):
@@ -1214,28 +1214,19 @@
def category_uni_not_space(self, ctx):
return not ctx.peek_char().isspace()
def category_uni_word(self, ctx):
- return _is_uni_word(ctx.peek_char())
+ return _sre._is_uni_word(ctx.peek_char())
def category_uni_not_word(self, ctx):
- return not _is_uni_word(ctx.peek_char())
+ return not _sre._is_uni_word(ctx.peek_char())
def category_uni_linebreak(self, ctx):
- return ord(ctx.peek_char()) in _uni_linebreaks
+ return _sre._is_uni_linebreak(ctx.peek_char())
def category_uni_not_linebreak(self, ctx):
- return ord(ctx.peek_char()) not in _uni_linebreaks
+ return not _sre._is_uni_linebreak(ctx.peek_char())
def unknown(self, ctx):
return False
_ChcodeDispatcher.build_dispatch_table(CHCODES, "")
-def _is_loc_word(char):
- return (not (ord(char) & ~255) and char.isalnum()) or char == '_'
-
-def _is_uni_word(char):
- return char.isalnum() or char == '_'
-
-# Static list of all unicode codepoints reported by Py_UNICODE_ISLINEBREAK.
-_uni_linebreaks = [10, 13, 28, 29, 30, 133, 8232, 8233]
-
def _log(message):
if 0:
print message
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Wed Aug 10 19:17:29 2005
@@ -1,6 +1,6 @@
from pypy.interpreter.baseobjspace import ObjSpace
-_ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
+ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
@@ -8,19 +8,45 @@
0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 ]
-_linebreak = ord("\n")
+linebreak = ord("\n")
+underline = ord("_")
+
+# Static list of all unicode codepoints reported by Py_UNICODE_ISLINEBREAK.
+# Using a dict as a poor man's set.
+uni_linebreaks = {10: True, 13: True, 28: True, 29: True, 30: True, 133: True,
+ 8232: True, 8233: True}
def _is_digit(space, w_char):
code = space.int_w(space.ord(w_char))
- return space.newbool(code < 128 and _ascii_char_info[code] & 1)
+ return space.newbool(code < 128 and ascii_char_info[code] & 1)
def _is_space(space, w_char):
code = space.int_w(space.ord(w_char))
- return space.newbool(code < 128 and _ascii_char_info[code] & 2)
+ return space.newbool(code < 128 and ascii_char_info[code] & 2)
def _is_word(space, w_char):
code = space.int_w(space.ord(w_char))
- return space.newbool(code < 128 and _ascii_char_info[code] & 16)
+ return space.newbool(code < 128 and ascii_char_info[code] & 16)
+
+def _is_uni_word(space, w_char):
+ code = space.int_w(space.ord(w_char))
+ w_unichar = space.newunicode([code])
+ isalnum = space.is_true(space.call_method(w_unichar, "isalnum"))
+ return space.newbool(isalnum or code == underline)
+
+def _is_loc_word(space, w_char):
+ code = space.int_w(space.ord(w_char))
+ if code > 255:
+ return space.newbool(False)
+ # Need to use this new w_char_not_uni from here on, because this one is
+ # guaranteed to be not unicode.
+ w_char_not_uni = space.wrap(chr(code))
+ isalnum = space.is_true(space.call_method(w_char_not_uni, "isalnum"))
+ return space.newbool(isalnum or code == underline)
def _is_linebreak(space, w_char):
- return space.newbool(space.int_w(space.ord(w_char)) == _linebreak)
+ return space.newbool(space.int_w(space.ord(w_char)) == linebreak)
+
+def _is_uni_linebreak(space, w_char):
+ code = space.int_w(space.ord(w_char))
+ return space.newbool(uni_linebreaks.has_key(code))
Modified: pypy/dist/pypy/module/_sre/test/test_app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/test/test_app_sre.py (original)
+++ pypy/dist/pypy/module/_sre/test/test_app_sre.py Wed Aug 10 19:17:29 2005
@@ -303,6 +303,7 @@
LOWER_PI = u"\u03c0"
INDIAN_DIGIT = u"\u0966"
EM_SPACE = u"\u2001"
+ LOWER_AE = "\xe4"
assert re.search(r"bla\d\s\w", "bla3 b")
assert re.search(r"b\d", u"b%s" % INDIAN_DIGIT, re.UNICODE)
assert not re.search(r"b\D", u"b%s" % INDIAN_DIGIT, re.UNICODE)
@@ -310,6 +311,7 @@
assert not re.search(r"b\S", u"b%s" % EM_SPACE, re.UNICODE)
assert re.search(r"b\w", u"b%s" % LOWER_PI, re.UNICODE)
assert not re.search(r"b\W", u"b%s" % LOWER_PI, re.UNICODE)
+ assert re.search(r"b\w", "b%s" % LOWER_AE, re.UNICODE)
def test_search_simple_any(self):
import re
@@ -657,9 +659,11 @@
opcodes2 = s.encode_literal("b") \
+ [s.OPCODES["category"], s.CHCODES["category_loc_not_word"], s.OPCODES["success"]]
s.assert_no_match(opcodes1, "b\xFC")
+ s.assert_no_match(opcodes1, u"b\u00FC")
s.assert_match(opcodes2, "b\xFC")
locale.setlocale(locale.LC_ALL, "de_DE")
s.assert_match(opcodes1, "b\xFC")
+ s.assert_no_match(opcodes1, u"b\u00FC")
s.assert_no_match(opcodes2, "b\xFC")
s.void_locale()
except locale.Error:
More information about the Pypy-commit
mailing list