[Python-checkins] CVS: python/dist/src/Lib sre.py,1.25,1.25.2.1 sre_compile.py,1.31,1.31.2.1 sre_constants.py,1.21,1.21.2.1 sre_parse.py,1.37,1.37.2.1
Guido van Rossum
gvanrossum@users.sourceforge.net
Wed, 13 Jun 2001 08:15:04 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv31452/Lib
Modified Files:
Tag: release20-maint
sre.py sre_compile.py sre_constants.py sre_parse.py
Log Message:
Bring SRE up do date with Python 2.1
Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.25
retrieving revision 1.25.2.1
diff -C2 -r1.25 -r1.25.2.1
*** sre.py 2000/09/21 17:03:24 1.25
--- sre.py 2001/06/13 15:15:02 1.25.2.1
***************
*** 4,8 ****
# re-compatible interface for the sre matching engine
#
! # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
# This version of the SRE library can be redistributed under CNRI's
--- 4,8 ----
# re-compatible interface for the sre matching engine
#
! # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# This version of the SRE library can be redistributed under CNRI's
***************
*** 15,35 ****
#
- # FIXME: change all FIXME's to XXX ;-)
-
import sre_compile
import sre_parse
import string
# flags
! I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
! L = LOCALE = sre_compile.SRE_FLAG_LOCALE
! M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
! S = DOTALL = sre_compile.SRE_FLAG_DOTALL
! X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
!
! # sre extensions (may or may not be in 1.6/2.0 final)
! T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
! U = UNICODE = sre_compile.SRE_FLAG_UNICODE
# sre exception
--- 15,43 ----
#
import sre_compile
import sre_parse
+ # public symbols
+ __all__ = [ "match", "search", "sub", "subn", "split", "findall",
+ "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
+ "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
+ "UNICODE", "error" ]
+
+ __version__ = "2.1b2"
+
+ # this module works under 1.5.2 and later. don't use string methods
import string
# flags
! I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
! L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
! U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
! M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
! S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
! X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
!
! # sre extensions (experimental, don't rely on these)
! T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
! DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
# sre exception
***************
*** 39,72 ****
# public interface
- # FIXME: add docstrings
-
def match(pattern, string, flags=0):
return _compile(pattern, flags).match(string)
def search(pattern, string, flags=0):
return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
return _compile(pattern, 0).sub(repl, string, count)
def subn(pattern, repl, string, count=0):
return _compile(pattern, 0).subn(repl, string, count)
def split(pattern, string, maxsplit=0):
return _compile(pattern, 0).split(string, maxsplit)
def findall(pattern, string, maxsplit=0):
return _compile(pattern, 0).findall(string, maxsplit)
def compile(pattern, flags=0):
return _compile(pattern, flags)
def purge():
_cache.clear()
def template(pattern, flags=0):
return _compile(pattern, flags|T)
def escape(pattern):
s = list(pattern)
for i in range(len(pattern)):
--- 47,104 ----
# public interface
def match(pattern, string, flags=0):
+ """Try to apply the pattern at the start of the string, returning
+ a match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
def search(pattern, string, flags=0):
+ """Scan through string looking for a match to the pattern, returning
+ a match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
+ """Return the string obtained by replacing the leftmost
+ non-overlapping occurrences of the pattern in string by the
+ replacement repl"""
return _compile(pattern, 0).sub(repl, string, count)
def subn(pattern, repl, string, count=0):
+ """Return a 2-tuple containing (new_string, number).
+ new_string is the string obtained by replacing the leftmost
+ non-overlapping occurrences of the pattern in the source
+ string by the replacement repl. number is the number of
+ substitutions that were made."""
return _compile(pattern, 0).subn(repl, string, count)
def split(pattern, string, maxsplit=0):
+ """Split the source string by the occurrences of the pattern,
+ returning a list containing the resulting substrings."""
return _compile(pattern, 0).split(string, maxsplit)
def findall(pattern, string, maxsplit=0):
+ """Return a list of all non-overlapping matches in the string.
+
+ If one or more groups are present in the pattern, return a
+ list of groups; this will be a list of tuples if the pattern
+ has more than one group.
+
+ Empty matches are included in the result."""
return _compile(pattern, 0).findall(string, maxsplit)
def compile(pattern, flags=0):
+ "Compile a regular expression pattern, returning a pattern object."
return _compile(pattern, flags)
def purge():
+ "Clear the regular expression cache"
_cache.clear()
+ _cache_repl.clear()
def template(pattern, flags=0):
+ "Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T)
def escape(pattern):
+ "Escape all non-alphanumeric characters in pattern."
s = list(pattern)
for i in range(len(pattern)):
***************
*** 83,86 ****
--- 115,120 ----
_cache = {}
+ _cache_repl = {}
+
_MAXCACHE = 100
***************
*** 106,109 ****
--- 140,158 ----
return p
+ def _compile_repl(*key):
+ # internal: compile replacement pattern
+ p = _cache_repl.get(key)
+ if p is not None:
+ return p
+ repl, pattern = key
+ try:
+ p = sre_parse.parse_template(repl, pattern)
+ except error, v:
+ raise error, v # invalid expression
+ if len(_cache_repl) >= _MAXCACHE:
+ _cache_repl.clear()
+ _cache_repl[key] = p
+ return p
+
def _expand(pattern, match, template):
# internal: match.expand implementation hook
***************
*** 120,124 ****
filter = template
else:
! template = sre_parse.parse_template(template, pattern)
def filter(match, template=template):
return sre_parse.expand_template(template, match)
--- 169,173 ----
filter = template
else:
! template = _compile_repl(template, pattern)
def filter(match, template=template):
return sre_parse.expand_template(template, match)
***************
*** 159,163 ****
append(string[i:b])
if g and b != e:
! extend(m.groups())
i = e
n = n + 1
--- 208,212 ----
append(string[i:b])
if g and b != e:
! extend(list(m.groups()))
i = e
n = n + 1
***************
*** 205,209 ****
action = self.lexicon[m.lastindex][1]
if callable(action):
! self.match = match
action = action(self, m.group())
if action is not None:
--- 254,258 ----
action = self.lexicon[m.lastindex][1]
if callable(action):
! self.match = m
action = action(self, m.group())
if action is not None:
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.31
retrieving revision 1.31.2.1
diff -C2 -r1.31 -r1.31.2.1
*** sre_compile.py 2000/10/07 17:38:22 1.31
--- sre_compile.py 2001/06/13 15:15:02 1.31.2.1
***************
*** 4,8 ****
# convert template to internal format
#
! # Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
--- 4,8 ----
# convert template to internal format
#
! # Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
***************
*** 13,16 ****
--- 13,18 ----
from sre_constants import *
+ assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+
MAXCODE = 65535
***************
*** 22,28 ****
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
else:
emit(OPCODES[op])
! emit(av)
elif op is IN:
if flags & SRE_FLAG_IGNORECASE:
--- 24,31 ----
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
+ emit(_sre.getlower(av, flags))
else:
emit(OPCODES[op])
! emit(av)
elif op is IN:
if flags & SRE_FLAG_IGNORECASE:
***************
*** 103,109 ****
emit(OPCODES[op])
if flags & SRE_FLAG_MULTILINE:
! emit(ATCODES[AT_MULTILINE.get(av, av)])
! else:
! emit(ATCODES[av])
elif op is BRANCH:
emit(OPCODES[op])
--- 106,115 ----
emit(OPCODES[op])
if flags & SRE_FLAG_MULTILINE:
! av = AT_MULTILINE.get(av, av)
! if flags & SRE_FLAG_LOCALE:
! av = AT_LOCALE.get(av, av)
! elif flags & SRE_FLAG_UNICODE:
! av = AT_UNICODE.get(av, av)
! emit(ATCODES[av])
elif op is BRANCH:
emit(OPCODES[op])
***************
*** 122,130 ****
emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE:
! emit(CHCODES[CH_LOCALE[av]])
elif flags & SRE_FLAG_UNICODE:
! emit(CHCODES[CH_UNICODE[av]])
! else:
! emit(CHCODES[av])
elif op is GROUPREF:
if flags & SRE_FLAG_IGNORECASE:
--- 128,135 ----
emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE:
! av = CH_LOCALE[av]
elif flags & SRE_FLAG_UNICODE:
! av = CH_UNICODE[av]
! emit(CHCODES[av])
elif op is GROUPREF:
if flags & SRE_FLAG_IGNORECASE:
***************
*** 177,181 ****
charmap[i] = 1
elif op is CATEGORY:
! # FIXME: could append to charmap tail
return charset # cannot compress
except IndexError:
--- 182,186 ----
charmap[i] = 1
elif op is CATEGORY:
! # XXX: could append to charmap tail
return charset # cannot compress
except IndexError:
***************
*** 365,369 ****
# print code
! # FIXME: <fl> get rid of this limitation!
assert p.pattern.groups <= 100,\
"sorry, but this version only supports 100 named groups"
--- 370,374 ----
# print code
! # XXX: <fl> get rid of this limitation!
assert p.pattern.groups <= 100,\
"sorry, but this version only supports 100 named groups"
Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.21
retrieving revision 1.21.2.1
diff -C2 -r1.21 -r1.21.2.1
*** sre_constants.py 2000/10/07 17:38:22 1.21
--- sre_constants.py 2001/06/13 15:15:02 1.21.2.1
***************
*** 5,15 ****
# run this script to update the _sre include files!
#
! # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
MAXREPEAT = 65535
# should this really be here?
--- 5,22 ----
# run this script to update the _sre include files!
#
! # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
+ # update when constants are added or removed
+
+ MAGIC = 20010320
+
+ # max code word in this release
+
MAXREPEAT = 65535
+ # SRE standard exception (access as sre.error)
# should this really be here?
***************
*** 55,62 ****
--- 62,75 ----
AT_BEGINNING = "at_beginning"
AT_BEGINNING_LINE = "at_beginning_line"
+ AT_BEGINNING_STRING = "at_beginning_string"
AT_BOUNDARY = "at_boundary"
AT_NON_BOUNDARY = "at_non_boundary"
AT_END = "at_end"
AT_END_LINE = "at_end_line"
+ AT_END_STRING = "at_end_string"
+ AT_LOC_BOUNDARY = "at_loc_boundary"
+ AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
+ AT_UNI_BOUNDARY = "at_uni_boundary"
+ AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
# categories
***************
*** 110,115 ****
ATCODES = [
! AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
! AT_NON_BOUNDARY, AT_END, AT_END_LINE
]
--- 123,130 ----
ATCODES = [
! AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
! AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
! AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
! AT_UNI_NON_BOUNDARY
]
***************
*** 149,152 ****
--- 164,177 ----
}
+ AT_LOCALE = {
+ AT_BOUNDARY: AT_LOC_BOUNDARY,
+ AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+ }
+
+ AT_UNICODE = {
+ AT_BOUNDARY: AT_UNI_BOUNDARY,
+ AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
+ }
+
CH_LOCALE = {
CATEGORY_DIGIT: CATEGORY_DIGIT,
***************
*** 179,182 ****
--- 204,208 ----
SRE_FLAG_UNICODE = 32 # use unicode locale
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+ SRE_FLAG_DEBUG = 128 # debugging
# flags for INFO primitive
***************
*** 202,206 ****
* to change anything in here, edit sre_constants.py and run it.
*
! * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
--- 228,232 ----
* to change anything in here, edit sre_constants.py and run it.
*
! * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
***************
*** 208,211 ****
--- 234,239 ----
""")
+
+ f.write("#define SRE_MAGIC %d\n" % MAGIC)
dump(f, OPCODES, "SRE_OP")
Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.37
retrieving revision 1.37.2.1
diff -C2 -r1.37 -r1.37.2.1
*** sre_parse.py 2000/10/07 17:38:22 1.37
--- sre_parse.py 2001/06/13 15:15:02 1.37.2.1
***************
*** 4,12 ****
# convert re-style regular expression to sre pattern
#
! # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
import string, sys
--- 4,15 ----
# convert re-style regular expression to sre pattern
#
! # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
+ # XXX: show string offset and offending character for all errors
+
+ # this module works under 1.5.2 and later. don't use string methods
import string, sys
***************
*** 24,39 ****
ESCAPES = {
! r"\a": (LITERAL, 7),
! r"\b": (LITERAL, 8),
! r"\f": (LITERAL, 12),
! r"\n": (LITERAL, 10),
! r"\r": (LITERAL, 13),
! r"\t": (LITERAL, 9),
! r"\v": (LITERAL, 11),
r"\\": (LITERAL, ord("\\"))
}
CATEGORIES = {
! r"\A": (AT, AT_BEGINNING), # start of string
r"\b": (AT, AT_BOUNDARY),
r"\B": (AT, AT_NON_BOUNDARY),
--- 27,42 ----
ESCAPES = {
! r"\a": (LITERAL, ord("\a")),
! r"\b": (LITERAL, ord("\b")),
! r"\f": (LITERAL, ord("\f")),
! r"\n": (LITERAL, ord("\n")),
! r"\r": (LITERAL, ord("\r")),
! r"\t": (LITERAL, ord("\t")),
! r"\v": (LITERAL, ord("\v")),
r"\\": (LITERAL, ord("\\"))
}
CATEGORIES = {
! r"\A": (AT, AT_BEGINNING_STRING), # start of string
r"\b": (AT, AT_BOUNDARY),
r"\B": (AT, AT_NON_BOUNDARY),
***************
*** 44,48 ****
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
! r"\Z": (AT, AT_END), # end of string
}
--- 47,51 ----
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
! r"\Z": (AT, AT_END_STRING), # end of string
}
***************
*** 59,74 ****
}
class Pattern:
# master pattern object. keeps track of global attributes
def __init__(self):
self.flags = 0
self.groups = 1
self.groupdict = {}
! def getgroup(self, name=None):
gid = self.groups
self.groups = gid + 1
if name:
self.groupdict[name] = gid
return gid
class SubPattern:
--- 62,90 ----
}
+ # figure out best way to convert hex/octal numbers to integers
+ try:
+ int("10", 8)
+ atoi = int # 2.0 and later
+ except TypeError:
+ atoi = string.atoi # 1.5.2
+
class Pattern:
# master pattern object. keeps track of global attributes
def __init__(self):
self.flags = 0
+ self.open = []
self.groups = 1
self.groupdict = {}
! def opengroup(self, name=None):
gid = self.groups
self.groups = gid + 1
if name:
self.groupdict[name] = gid
+ self.open.append(gid)
return gid
+ def closegroup(self, gid):
+ self.open.remove(gid)
+ def checkgroup(self, gid):
+ return gid < self.groups and gid not in self.open
class SubPattern:
***************
*** 209,213 ****
# check if the escape string represents a valid group
try:
! gid = int(escape[1:])
if gid and gid < groups:
return gid
--- 225,229 ----
# check if the escape string represents a valid group
try:
! gid = atoi(escape[1:])
if gid and gid < groups:
return gid
***************
*** 232,236 ****
if len(escape) != 2:
raise error, "bogus escape: %s" % repr("\\" + escape)
! return LITERAL, int(escape, 16) & 0xff
elif str(escape[1:2]) in OCTDIGITS:
# octal escape (up to three digits)
--- 248,252 ----
if len(escape) != 2:
raise error, "bogus escape: %s" % repr("\\" + escape)
! return LITERAL, atoi(escape, 16) & 0xff
elif str(escape[1:2]) in OCTDIGITS:
# octal escape (up to three digits)
***************
*** 238,242 ****
escape = escape + source.get()
escape = escape[1:]
! return LITERAL, int(escape, 8) & 0xff
if len(escape) == 2:
return LITERAL, ord(escape[1])
--- 254,258 ----
escape = escape + source.get()
escape = escape[1:]
! return LITERAL, atoi(escape, 8) & 0xff
if len(escape) == 2:
return LITERAL, ord(escape[1])
***************
*** 260,269 ****
if len(escape) != 4:
raise ValueError
! return LITERAL, int(escape[2:], 16) & 0xff
elif escape[1:2] == "0":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
! return LITERAL, int(escape[1:], 8) & 0xff
elif escape[1:2] in DIGITS:
# octal escape *or* decimal group reference (sigh)
--- 276,285 ----
if len(escape) != 4:
raise ValueError
! return LITERAL, atoi(escape[2:], 16) & 0xff
elif escape[1:2] == "0":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
! return LITERAL, atoi(escape[1:], 8) & 0xff
elif escape[1:2] in DIGITS:
# octal escape *or* decimal group reference (sigh)
***************
*** 275,282 ****
# got three octal digits; this is an octal escape
escape = escape + source.get()
! return LITERAL, int(escape[1:], 8) & 0xff
# got at least one decimal digit; this is a group reference
group = _group(escape, state.groups)
if group:
return GROUPREF, group
raise ValueError
--- 291,300 ----
# got three octal digits; this is an octal escape
escape = escape + source.get()
! return LITERAL, atoi(escape[1:], 8) & 0xff
# got at least one decimal digit; this is a group reference
group = _group(escape, state.groups)
if group:
+ if not state.checkgroup(group):
+ raise error, "cannot refer to open group"
return GROUPREF, group
raise ValueError
***************
*** 403,411 ****
code2 = LITERAL, ord(this)
if code1[0] != LITERAL or code2[0] != LITERAL:
! raise error, "illegal range"
lo = code1[1]
hi = code2[1]
if hi < lo:
! raise error, "illegal range"
set.append((RANGE, (lo, hi)))
else:
--- 421,429 ----
code2 = LITERAL, ord(this)
if code1[0] != LITERAL or code2[0] != LITERAL:
! raise error, "bad character range"
lo = code1[1]
hi = code2[1]
if hi < lo:
! raise error, "bad character range"
set.append((RANGE, (lo, hi)))
else:
***************
*** 414,418 ****
set.append(code1)
! # FIXME: <fl> move set optimization to compiler!
if len(set)==1 and set[0][0] is LITERAL:
subpattern.append(set[0]) # optimization
--- 432,436 ----
set.append(code1)
! # XXX: <fl> should move set optimization to compiler!
if len(set)==1 and set[0][0] is LITERAL:
subpattern.append(set[0]) # optimization
***************
*** 420,424 ****
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
else:
! # FIXME: <fl> add charmap optimization
subpattern.append((IN, set))
--- 438,442 ----
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
else:
! # XXX: <fl> should add charmap optimization here
subpattern.append((IN, set))
***************
*** 429,432 ****
--- 447,451 ----
elif this == "*":
min, max = 0, MAXREPEAT
+
elif this == "+":
min, max = 1, MAXREPEAT
***************
*** 447,454 ****
continue
if lo:
! min = int(lo)
if hi:
! max = int(hi)
! # FIXME: <fl> check that hi >= lo!
else:
raise error, "not supported"
--- 466,474 ----
continue
if lo:
! min = atoi(lo)
if hi:
! max = atoi(hi)
! if max < min:
! raise error, "bad repeat interval"
else:
raise error, "not supported"
***************
*** 457,461 ****
--- 477,485 ----
item = subpattern[-1:]
else:
+ item = None
+ if not item or (len(item) == 1 and item[0][0] == AT):
raise error, "nothing to repeat"
+ if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
+ raise error, "multiple repeat"
if source.match("?"):
subpattern[-1] = (MIN_REPEAT, (min, max, item))
***************
*** 486,490 ****
group = 1
if not isname(name):
! raise error, "illegal character in group name"
elif source.match("="):
# named backreference
--- 510,514 ----
group = 1
if not isname(name):
! raise error, "bad character in group name"
elif source.match("="):
# named backreference
***************
*** 498,502 ****
name = name + char
if not isname(name):
! raise error, "illegal character in group name"
gid = state.groupdict.get(name)
if gid is None:
--- 522,526 ----
name = name + char
if not isname(name):
! raise error, "bad character in group name"
gid = state.groupdict.get(name)
if gid is None:
***************
*** 540,543 ****
--- 564,569 ----
else:
# flags
+ if not FLAGS.has_key(source.next):
+ raise error, "unexpected end of pattern"
while FLAGS.has_key(source.next):
state.flags = state.flags | FLAGS[source.get()]
***************
*** 548,560 ****
group = None
else:
! group = state.getgroup(name)
p = _parse_sub(source, state)
if not source.match(")"):
raise error, "unbalanced parenthesis"
subpattern.append((SUBPATTERN, (group, p)))
else:
while 1:
char = source.get()
! if char is None or char == ")":
break
raise error, "unknown extension"
--- 574,590 ----
group = None
else:
! group = state.opengroup(name)
p = _parse_sub(source, state)
if not source.match(")"):
raise error, "unbalanced parenthesis"
+ if group is not None:
+ state.closegroup(group)
subpattern.append((SUBPATTERN, (group, p)))
else:
while 1:
char = source.get()
! if char is None:
! raise error, "unexpected end of pattern"
! if char == ")":
break
raise error, "unknown extension"
***************
*** 583,586 ****
--- 613,617 ----
pattern = Pattern()
pattern.flags = flags
+ pattern.str = str
p = _parse_sub(source, pattern, 0)
***************
*** 592,596 ****
raise error, "bogus characters at end of regular expression"
! # p.dump()
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
--- 623,628 ----
raise error, "bogus characters at end of regular expression"
! if flags & SRE_FLAG_DEBUG:
! p.dump()
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
***************
*** 607,610 ****
--- 639,652 ----
p = []
a = p.append
+ def literal(literal, p=p):
+ if p and p[-1][0] is LITERAL:
+ p[-1] = LITERAL, p[-1][1] + literal
+ else:
+ p.append((LITERAL, literal))
+ sep = source[:0]
+ if type(sep) is type(""):
+ char = chr
+ else:
+ char = unichr
while 1:
this = s.get()
***************
*** 626,633 ****
raise error, "bad group name"
try:
! index = int(name)
except ValueError:
if not isname(name):
! raise error, "illegal character in group name"
try:
index = pattern.groupindex[name]
--- 668,675 ----
raise error, "bad group name"
try:
! index = atoi(name)
except ValueError:
if not isname(name):
! raise error, "bad character in group name"
try:
index = pattern.groupindex[name]
***************
*** 642,646 ****
if (s.next not in DIGITS or
not _group(this + s.next, pattern.groups+1)):
! code = MARK, int(group)
break
elif s.next in OCTDIGITS:
--- 684,688 ----
if (s.next not in DIGITS or
not _group(this + s.next, pattern.groups+1)):
! code = MARK, group
break
elif s.next in OCTDIGITS:
***************
*** 650,682 ****
if not code:
this = this[1:]
! code = LITERAL, int(this[-6:], 8) & 0xff
! a(code)
else:
try:
! a(ESCAPES[this])
except KeyError:
! for c in this:
! a((LITERAL, ord(c)))
else:
! a((LITERAL, ord(this)))
! return p
def expand_template(template, match):
! # FIXME: <fl> this is sooooo slow. drop in the slicelist
! # code instead
! p = []
! a = p.append
sep = match.string[:0]
! if type(sep) is type(""):
! char = chr
! else:
! char = unichr
! for c, s in template:
! if c is LITERAL:
! a(char(s))
! elif c is MARK:
! s = match.group(s)
if s is None:
! raise error, "empty group"
! a(s)
! return string.join(p, sep)
--- 692,732 ----
if not code:
this = this[1:]
! code = LITERAL, char(atoi(this[-6:], 8) & 0xff)
! if code[0] is LITERAL:
! literal(code[1])
! else:
! a(code)
else:
try:
! this = char(ESCAPES[this][1])
except KeyError:
! pass
! literal(this)
else:
! literal(this)
! # convert template to groups and literals lists
! i = 0
! groups = []
! literals = []
! for c, s in p:
! if c is MARK:
! groups.append((i, s))
! literals.append(None)
! else:
! literals.append(s)
! i = i + 1
! return groups, literals
def expand_template(template, match):
! g = match.group
sep = match.string[:0]
! groups, literals = template
! literals = literals[:]
! try:
! for index, group in groups:
! literals[index] = s = g(group)
if s is None:
! raise IndexError
! except IndexError:
! raise error, "empty group"
! return string.join(literals, sep)