[pypy-svn] r16683 - pypy/dist/pypy/module/_sre
nik at codespeak.net
nik at codespeak.net
Fri Aug 26 20:26:31 CEST 2005
Author: nik
Date: Fri Aug 26 20:26:28 2005
New Revision: 16683
Modified:
pypy/dist/pypy/module/_sre/__init__.py
pypy/dist/pypy/module/_sre/app_sre.py
pypy/dist/pypy/module/_sre/interp_sre.py
Log:
converted and enabled fast_search. refactored getlower to be more efficient
on interp-level. removed unneeded imports from app-level.
Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py (original)
+++ pypy/dist/pypy/module/_sre/__init__.py Fri Aug 26 20:26:28 2005
@@ -17,7 +17,7 @@
}
interpleveldefs = {
- 'getlower': 'interp_sre.getlower',
+ 'getlower': 'interp_sre.w_getlower',
'_State': 'interp_sre.make_state',
'_match': 'interp_sre.w_match',
'_search': 'interp_sre.w_search',
Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py (original)
+++ pypy/dist/pypy/module/_sre/app_sre.py Fri Aug 26 20:26:28 2005
@@ -7,12 +7,8 @@
copyrighted by: Copyright (c) 1997-2001 by Secret Labs AB
"""
-import array, operator, sys
-from sre_constants import ATCODES, OPCODES, CHCODES, MAXREPEAT
-from sre_constants import SRE_INFO_PREFIX, SRE_INFO_LITERAL
-from sre_constants import SRE_FLAG_UNICODE, SRE_FLAG_LOCALE
+import sys
import _sre
-from _sre import CODESIZE
def compile(pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
@@ -289,45 +285,3 @@
def __deepcopy__():
raise TypeError, "cannot copy this pattern object"
-
-
-def fast_search(state, pattern_codes):
- """Skips forward in a string as fast as possible using information from
- an optimization info block."""
- # pattern starts with a known prefix
- # <5=length> <6=skip> <7=prefix data> <overlap data>
- flags = pattern_codes[2]
- prefix_len = pattern_codes[5]
- prefix_skip = pattern_codes[6] # don't really know what this is good for
- prefix = pattern_codes[7:7 + prefix_len]
- overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
- pattern_codes = pattern_codes[pattern_codes[1] + 1:]
- i = 0
- string_position = state.string_position
- while string_position < state.end:
- while True:
- if ord(state.string[string_position]) != prefix[i]:
- if i == 0:
- break
- else:
- i = overlap[i]
- else:
- i += 1
- if i == prefix_len:
- # found a potential match
- state.start = string_position + 1 - prefix_len
- state.string_position = string_position + 1 \
- - prefix_len + prefix_skip
- if flags & SRE_INFO_LITERAL:
- return True # matched all of pure literal pattern
- if _sre._match(state, pattern_codes[2 * prefix_skip:]):
- return True
- i = overlap[i]
- break
- string_position += 1
- return False
-
-
-def _log(message):
- if 0:
- print message
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Fri Aug 26 20:26:28 2005
@@ -11,6 +11,7 @@
#### Exposed functions
# XXX can we import those safely from sre_constants?
+SRE_INFO_PREFIX = 1
SRE_INFO_LITERAL = 2
SRE_FLAG_LOCALE = 4 # honour system locale
SRE_FLAG_UNICODE = 32 # use unicode locale
@@ -18,16 +19,17 @@
OPCODE_LITERAL = 19
MAXREPEAT = 65535
-def getlower(space, w_char_ord, w_flags):
- char_ord = space.int_w(w_char_ord)
- flags = space.int_w(w_flags)
+def w_getlower(space, w_char_ord, w_flags):
+ return space.wrap(getlower(space, space.int_w(w_char_ord), space.int_w(w_flags)))
+
+def getlower(space, char_ord, flags):
if (char_ord < 128) or (flags & SRE_FLAG_UNICODE) \
or (flags & SRE_FLAG_LOCALE and char_ord < 256):
w_uni_char = space.newunicode([char_ord])
w_lowered = space.call_method(w_uni_char, "lower")
- return space.ord(w_lowered)
+ return space.int_w(space.ord(w_lowered))
else:
- return space.wrap(char_ord)
+ return char_ord
#### Core classes
@@ -102,9 +104,7 @@
self.marks_stack.pop()
def lower(self, char_ord):
- # XXX this is ugly
- space = self.space
- return space.int_w(getlower(space, space.wrap(char_ord), space.wrap(self.flags)))
+ return getlower(self.space, char_ord, self.flags)
def interp_attrproperty_int(name, cls):
"NOT_RPYTHON: initialization-time only"
@@ -236,9 +236,8 @@
if pattern_codes[0] == OPCODE_INFO:
# optimization info block
# <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
- # XXX fast_search temporarily disabled
- #if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
- # return state.fast_search(pattern_codes)
+ if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
+ return fast_search(space, state, pattern_codes)
flags = pattern_codes[2]
pattern_codes = pattern_codes[pattern_codes[1] + 1:]
@@ -251,6 +250,44 @@
string_position += 1
return False
+def fast_search(space, state, pattern_codes):
+ """Skips forward in a string as fast as possible using information from
+ an optimization info block."""
+ # pattern starts with a known prefix
+ # <5=length> <6=skip> <7=prefix data> <overlap data>
+ flags = pattern_codes[2]
+ prefix_len = pattern_codes[5]
+ prefix_skip = pattern_codes[6] # don't really know what this is good for
+ prefix = pattern_codes[7:7 + prefix_len]
+ overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
+ pattern_codes = pattern_codes[pattern_codes[1] + 1:]
+ i = 0
+ string_position = state.string_position
+ while string_position < state.end:
+ while True:
+ char_ord = space.int_w(space.ord(
+ space.getitem(state.w_string, space.wrap(string_position))))
+ if char_ord != prefix[i]:
+ if i == 0:
+ break
+ else:
+ i = overlap[i]
+ else:
+ i += 1
+ if i == prefix_len:
+ # found a potential match
+ state.start = string_position + 1 - prefix_len
+ state.string_position = string_position + 1 \
+ - prefix_len + prefix_skip
+ if flags & SRE_INFO_LITERAL:
+ return True # matched all of pure literal pattern
+ if match(space, state, pattern_codes[2 * prefix_skip:]):
+ return True
+ i = overlap[i]
+ break
+ string_position += 1
+ return False
+
def w_match(space, w_state, w_pattern_codes):
pattern_codes = [space.int_w(code) for code
in space.unpackiterable(w_pattern_codes)]
More information about the Pypy-commit
mailing list