Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22850
Modified Files:
sre_parse.py
Log Message:
Simple Optimizations:
* Factor constant expressions out of loops.
* Presize a list being grown to a known length.
Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.59
retrieving revision 1.60
diff -C2 -d -r1.59 -r1.60
*** sre_parse.py 18 Jan 2004 20:29:54 -0000 1.59
--- sre_parse.py 26 Mar 2004 23:24:00 -0000 1.60
***************
*** 104,107 ****
--- 104,108 ----
def dump(self, level=0):
nl = 1
+ seqtypes = type(()), type([])
for op, av in self.data:
print level*" " + op,; nl = 0
***************
*** 119,123 ****
a.dump(level+1); nl = 1
i = i + 1
! elif type(av) in (type(()), type([])):
for a in av:
if isinstance(a, SubPattern):
--- 120,124 ----
a.dump(level+1); nl = 1
i = i + 1
! elif type(av) in seqtypes:
for a in av:
if isinstance(a, SubPattern):
***************
*** 150,153 ****
--- 151,156 ----
return self.width
lo = hi = 0L
+ UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
+ REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
for op, av in self.data:
if op is BRANCH:
***************
*** 168,176 ****
lo = lo + i
hi = hi + j
! elif op in (MIN_REPEAT, MAX_REPEAT):
i, j = av[2].getwidth()
lo = lo + long(i) * av[0]
hi = hi + long(j) * av[1]
! elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
lo = lo + 1
hi = hi + 1
--- 171,179 ----
lo = lo + i
hi = hi + j
! elif op in REPEATCODES:
i, j = av[2].getwidth()
lo = lo + long(i) * av[0]
hi = hi + long(j) * av[1]
! elif op in UNITCODES:
lo = lo + 1
hi = hi + 1
***************
*** 314,324 ****
items = []
while 1:
! items.append(_parse(source, state))
! if source.match("|"):
continue
if not nested:
break
! if not source.next or source.match(")", 0):
break
else:
--- 317,329 ----
items = []
+ itemsappend = items.append
+ sourcematch = source.match
while 1:
! itemsappend(_parse(source, state))
! if sourcematch("|"):
continue
if not nested:
break
! if not source.next or sourcematch(")", 0):
break
else:
***************
*** 329,332 ****
--- 334,338 ----
subpattern = SubPattern(state)
+ subpatternappend = subpattern.append
# check if all items share a common prefix
***************
*** 345,349 ****
for item in items:
del item[0]
! subpattern.append(prefix)
continue # check next one
break
--- 351,355 ----
for item in items:
del item[0]
! subpatternappend(prefix)
continue # check next one
break
***************
*** 357,363 ****
# branch (the compiler may optimize this even more)
set = []
for item in items:
! set.append(item[0])
! subpattern.append((IN, set))
return subpattern
--- 363,370 ----
# branch (the compiler may optimize this even more)
set = []
+ setappend = set.append
for item in items:
! setappend(item[0])
! subpatternappend((IN, set))
return subpattern
***************
*** 381,392 ****
def _parse(source, state):
# parse a simple pattern
-
subpattern = SubPattern(state)
while 1:
! if source.next in ("|", ")"):
break # end of subpattern
! this = source.get()
if this is None:
break # end of pattern
--- 388,408 ----
def _parse(source, state):
# parse a simple pattern
subpattern = SubPattern(state)
+ # precompute constants into local variables
+ subpatternappend = subpattern.append
+ sourceget = source.get
+ sourcematch = source.match
+ _len = len
+ PATTERNENDERS = ("|", ")")
+ ASSERTCHARS = ("=", "!", "<")
+ LOOKBEHINDASSERTCHARS = ("=", "!")
+ REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
+
while 1:
! if source.next in PATTERNENDERS:
break # end of subpattern
! this = sourceget()
if this is None:
break # end of pattern
***************
*** 398,402 ****
if this == "#":
while 1:
! this = source.get()
if this in (None, "\n"):
break
--- 414,418 ----
if this == "#":
while 1:
! this = sourceget()
if this in (None, "\n"):
break
***************
*** 404,420 ****
if this and this[0] not in SPECIAL_CHARS:
! subpattern.append((LITERAL, ord(this)))
elif this == "[":
# character set
set = []
! ## if source.match(":"):
## pass # handle character classes
! if source.match("^"):
! set.append((NEGATE, None))
# check remaining characters
start = set[:]
while 1:
! this = source.get()
if this == "]" and set != start:
break
--- 420,437 ----
if this and this[0] not in SPECIAL_CHARS:
! subpatternappend((LITERAL, ord(this)))
elif this == "[":
# character set
set = []
! setappend = set.append
! ## if sourcematch(":"):
## pass # handle character classes
! if sourcematch("^"):
! setappend((NEGATE, None))
# check remaining characters
start = set[:]
while 1:
! this = sourceget()
if this == "]" and set != start:
break
***************
*** 425,436 ****
else:
raise error, "unexpected end of regular expression"
! if source.match("-"):
# potential range
! this = source.get()
if this == "]":
if code1[0] is IN:
code1 = code1[1][0]
! set.append(code1)
! set.append((LITERAL, ord("-")))
break
elif this:
--- 442,453 ----
else:
raise error, "unexpected end of regular expression"
! if sourcematch("-"):
# potential range
! this = sourceget()
if this == "]":
if code1[0] is IN:
code1 = code1[1][0]
! setappend(code1)
! setappend((LITERAL, ord("-")))
break
elif this:
***************
*** 445,449 ****
if hi < lo:
raise error, "bad character range"
! set.append((RANGE, (lo, hi)))
else:
raise error, "unexpected end of regular expression"
--- 462,466 ----
if hi < lo:
raise error, "bad character range"
! setappend((RANGE, (lo, hi)))
else:
raise error, "unexpected end of regular expression"
***************
*** 451,464 ****
if code1[0] is IN:
code1 = code1[1][0]
! set.append(code1)
# XXX: <fl> should move set optimization to compiler!
! if len(set)==1 and set[0][0] is LITERAL:
! subpattern.append(set[0]) # optimization
! elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
! subpattern.append((NOT_LITERAL, set[1][1])) # optimization
else:
# XXX: <fl> should add charmap optimization here
! subpattern.append((IN, set))
elif this and this[0] in REPEAT_CHARS:
--- 468,481 ----
if code1[0] is IN:
code1 = code1[1][0]
! setappend(code1)
# XXX: <fl> should move set optimization to compiler!
! if _len(set)==1 and set[0][0] is LITERAL:
! subpatternappend(set[0]) # optimization
! elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
! subpatternappend((NOT_LITERAL, set[1][1])) # optimization
else:
# XXX: <fl> should add charmap optimization here
! subpatternappend((IN, set))
elif this and this[0] in REPEAT_CHARS:
***************
*** 477,487 ****
while source.next in DIGITS:
lo = lo + source.get()
! if source.match(","):
while source.next in DIGITS:
! hi = hi + source.get()
else:
hi = lo
! if not source.match("}"):
! subpattern.append((LITERAL, ord(this)))
source.seek(here)
continue
--- 494,504 ----
while source.next in DIGITS:
lo = lo + source.get()
! if sourcematch(","):
while source.next in DIGITS:
! hi = hi + sourceget()
else:
hi = lo
! if not sourcematch("}"):
! subpatternappend((LITERAL, ord(this)))
source.seek(here)
continue
***************
*** 499,507 ****
else:
item = None
! if not item or (len(item) == 1 and item[0][0] == AT):
raise error, "nothing to repeat"
! if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
raise error, "multiple repeat"
! if source.match("?"):
subpattern[-1] = (MIN_REPEAT, (min, max, item))
else:
--- 516,524 ----
else:
item = None
! if not item or (_len(item) == 1 and item[0][0] == AT):
raise error, "nothing to repeat"
! if item[0][0] in REPEATCODES:
raise error, "multiple repeat"
! if sourcematch("?"):
subpattern[-1] = (MIN_REPEAT, (min, max, item))
else:
***************
*** 509,513 ****
elif this == ".":
! subpattern.append((ANY, None))
elif this == "(":
--- 526,530 ----
elif this == ".":
! subpatternappend((ANY, None))
elif this == "(":
***************
*** 515,528 ****
name = None
condgroup = None
! if source.match("?"):
group = 0
# options
! if source.match("P"):
# python extensions
! if source.match("<"):
# named group: skip forward to end of name
name = ""
while 1:
! char = source.get()
if char is None:
raise error, "unterminated name"
--- 532,545 ----
name = None
condgroup = None
! if sourcematch("?"):
group = 0
# options
! if sourcematch("P"):
# python extensions
! if sourcematch("<"):
# named group: skip forward to end of name
name = ""
while 1:
! char = sourceget()
if char is None:
raise error, "unterminated name"
***************
*** 533,541 ****
if not isname(name):
raise error, "bad character in group name"
! elif source.match("="):
# named backreference
name = ""
while 1:
! char = source.get()
if char is None:
raise error, "unterminated name"
--- 550,558 ----
if not isname(name):
raise error, "bad character in group name"
! elif sourcematch("="):
# named backreference
name = ""
while 1:
! char = sourceget()
if char is None:
raise error, "unterminated name"
***************
*** 548,592 ****
if gid is None:
raise error, "unknown group name"
! subpattern.append((GROUPREF, gid))
continue
else:
! char = source.get()
if char is None:
raise error, "unexpected end of pattern"
raise error, "unknown specifier: ?P%s" % char
! elif source.match(":"):
# non-capturing group
group = 2
! elif source.match("#"):
# comment
while 1:
if source.next is None or source.next == ")":
break
! source.get()
! if not source.match(")"):
raise error, "unbalanced parenthesis"
continue
! elif source.next in ("=", "!", "<"):
# lookahead assertions
! char = source.get()
dir = 1
if char == "<":
! if source.next not in ("=", "!"):
raise error, "syntax error"
dir = -1 # lookbehind
! char = source.get()
p = _parse_sub(source, state)
! if not source.match(")"):
raise error, "unbalanced parenthesis"
if char == "=":
! subpattern.append((ASSERT, (dir, p)))
else:
! subpattern.append((ASSERT_NOT, (dir, p)))
continue
! elif source.match("("):
# conditional backreference group
condname = ""
while 1:
! char = source.get()
if char is None:
raise error, "unterminated name"
--- 565,609 ----
if gid is None:
raise error, "unknown group name"
! subpatternappend((GROUPREF, gid))
continue
else:
! char = sourceget()
if char is None:
raise error, "unexpected end of pattern"
raise error, "unknown specifier: ?P%s" % char
! elif sourcematch(":"):
# non-capturing group
group = 2
! elif sourcematch("#"):
# comment
while 1:
if source.next is None or source.next == ")":
break
! sourceget()
! if not sourcematch(")"):
raise error, "unbalanced parenthesis"
continue
! elif source.next in ASSERTCHARS:
# lookahead assertions
! char = sourceget()
dir = 1
if char == "<":
! if source.next not in LOOKBEHINDASSERTCHARS:
raise error, "syntax error"
dir = -1 # lookbehind
! char = sourceget()
p = _parse_sub(source, state)
! if not sourcematch(")"):
raise error, "unbalanced parenthesis"
if char == "=":
! subpatternappend((ASSERT, (dir, p)))
else:
! subpatternappend((ASSERT_NOT, (dir, p)))
continue
! elif sourcematch("("):
# conditional backreference group
condname = ""
while 1:
! char = sourceget()
if char is None:
raise error, "unterminated name"
***************
*** 609,613 ****
raise error, "unexpected end of pattern"
while source.next in FLAGS:
! state.flags = state.flags | FLAGS[source.get()]
if group:
# parse group contents
--- 626,630 ----
raise error, "unexpected end of pattern"
while source.next in FLAGS:
! state.flags = state.flags | FLAGS[sourceget()]
if group:
# parse group contents
***************
*** 621,632 ****
else:
p = _parse_sub(source, state)
! if not source.match(")"):
raise error, "unbalanced parenthesis"
if group is not None:
state.closegroup(group)
! subpattern.append((SUBPATTERN, (group, p)))
else:
while 1:
! char = source.get()
if char is None:
raise error, "unexpected end of pattern"
--- 638,649 ----
else:
p = _parse_sub(source, state)
! if not sourcematch(")"):
raise error, "unbalanced parenthesis"
if group is not None:
state.closegroup(group)
! subpatternappend((SUBPATTERN, (group, p)))
else:
while 1:
! char = sourceget()
if char is None:
raise error, "unexpected end of pattern"
***************
*** 636,640 ****
elif this == "^":
! subpattern.append((AT, AT_BEGINNING))
elif this == "$":
--- 653,657 ----
elif this == "^":
! subpatternappend((AT, AT_BEGINNING))
elif this == "$":
***************
*** 643,647 ****
elif this and this[0] == "\\":
code = _escape(source, this, state)
! subpattern.append(code)
else:
--- 660,664 ----
elif this and this[0] == "\\":
code = _escape(source, this, state)
! subpatternappend(code)
else:
***************
*** 682,692 ****
# group references
s = Tokenizer(source)
p = []
a = p.append
! def literal(literal, p=p):
if p and p[-1][0] is LITERAL:
p[-1] = LITERAL, p[-1][1] + literal
else:
! p.append((LITERAL, literal))
sep = source[:0]
if type(sep) is type(""):
--- 699,710 ----
# group references
s = Tokenizer(source)
+ sget = s.get
p = []
a = p.append
! def literal(literal, p=p, pappend=a):
if p and p[-1][0] is LITERAL:
p[-1] = LITERAL, p[-1][1] + literal
else:
! pappend((LITERAL, literal))
sep = source[:0]
if type(sep) is type(""):
***************
*** 695,699 ****
makechar = unichr
while 1:
! this = s.get()
if this is None:
break # end of replacement string
--- 713,717 ----
makechar = unichr
while 1:
! this = sget()
if this is None:
break # end of replacement string
***************
*** 704,708 ****
if s.match("<"):
while 1:
! char = s.get()
if char is None:
raise error, "unterminated group name"
--- 722,726 ----
if s.match("<"):
while 1:
! char = sget()
if char is None:
raise error, "unterminated group name"
***************
*** 732,736 ****
break
elif s.next in OCTDIGITS:
! this = this + s.get()
else:
break
--- 750,754 ----
break
elif s.next in OCTDIGITS:
! this = this + sget()
else:
break
***************
*** 753,763 ****
i = 0
groups = []
! literals = []
for c, s in p:
if c is MARK:
! groups.append((i, s))
! literals.append(None)
else:
! literals.append(s)
i = i + 1
return groups, literals
--- 771,782 ----
i = 0
groups = []
! groupsappend = groups.append
! literals = [None] * len(p)
for c, s in p:
if c is MARK:
! groupsappend((i, s))
! # literal[i] is already None
else:
! literals[i] = s
i = i + 1
return groups, literals