[Python-checkins] r52995 - in sandbox/trunk/2to3: PatternGrammar.txt fix_apply.py fix_has_key.py patcomp.py pgen2/driver.py pgen2/pgen.py pytree.py
guido.van.rossum
python-checkins at python.org
Mon Dec 11 04:57:44 CET 2006
Author: guido.van.rossum
Date: Mon Dec 11 04:57:36 2006
New Revision: 52995
Modified:
sandbox/trunk/2to3/PatternGrammar.txt
sandbox/trunk/2to3/fix_apply.py
sandbox/trunk/2to3/fix_has_key.py
sandbox/trunk/2to3/patcomp.py
sandbox/trunk/2to3/pgen2/driver.py
sandbox/trunk/2to3/pgen2/pgen.py
sandbox/trunk/2to3/pytree.py
Log:
Use a fixed number instead of sys.maxint.
Change pattern repr.
Add some pattern optimizations.
Allow {N} in pattern syntax.
Refactor parser driver to allow a different source of tokens than tokenize.
Add a filter for tokenize that removes significant whitespace
(so it can be insignificant for patterns).
Close the stream from which the grammar is read if we opened it.
Modified: sandbox/trunk/2to3/PatternGrammar.txt
==============================================================================
--- sandbox/trunk/2to3/PatternGrammar.txt (original)
+++ sandbox/trunk/2to3/PatternGrammar.txt Mon Dec 11 04:57:36 2006
@@ -23,6 +23,6 @@
NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')')
-Repeater: '*' | '+' | '{' NUMBER ',' NUMBER '}'
+Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}'
Details: '<' Alternatives '>'
Modified: sandbox/trunk/2to3/fix_apply.py
==============================================================================
--- sandbox/trunk/2to3/fix_apply.py (original)
+++ sandbox/trunk/2to3/fix_apply.py Mon Dec 11 04:57:36 2006
@@ -64,7 +64,7 @@
# Tree matching patterns
pat_compile = patcomp.PatternCompiler().compile_pattern
-p_apply = pat_compile("""(
+p_apply = pat_compile("""
power< 'apply'
trailer<
'('
@@ -76,7 +76,7 @@
')'
>
>
-)"""
+"""
)
Modified: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- sandbox/trunk/2to3/fix_has_key.py (original)
+++ sandbox/trunk/2to3/fix_has_key.py Mon Dec 11 04:57:36 2006
@@ -55,7 +55,7 @@
# Tree matching pattern
pat_compile = patcomp.PatternCompiler().compile_pattern
-p_has_key_top = pat_compile("""(
+p_has_key_top = pat_compile("""
power<
before=any+
trailer< '.' 'has_key' >
@@ -68,7 +68,7 @@
>
after=any*
>
-)""")
+""")
def fix_has_key(node):
results = {}
Modified: sandbox/trunk/2to3/patcomp.py
==============================================================================
--- sandbox/trunk/2to3/patcomp.py (original)
+++ sandbox/trunk/2to3/patcomp.py Mon Dec 11 04:57:36 2006
@@ -10,9 +10,9 @@
__author__ = "Guido van Rossum <guido at python.org>"
-# Python tokens
-import sys
+# Python imports
import token
+import tokenize
# Fairly local imports
from pgen2 import driver
@@ -35,6 +35,16 @@
setattr(self, name, grammar.symbol2number[name])
+def tokenize_wrapper(input):
+ """Tokenizes a string suppressing significant whitespace."""
+ skip = (token.NEWLINE, token.INDENT, token.DEDENT)
+ tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
+ for quintuple in tokens:
+ type, value, start, end, line_text = quintuple
+ if type not in skip:
+ yield quintuple
+
+
class PatternCompiler(object):
def __init__(self, grammar_file="PatternGrammar.txt"):
@@ -50,7 +60,8 @@
def compile_pattern(self, input, debug=False):
"""Compiles a pattern string to a nested pytree.*Pattern object."""
- root = self.driver.parse_string(input, debug=debug)
+ tokens = tokenize_wrapper(input)
+ root = self.driver.parse_tokens(tokens, debug=debug)
return self.compile_node(root)
def compile_node(self, node):
@@ -58,22 +69,30 @@
This is one big switch on the node type.
"""
- # XXX Leave the optimizations to later
+ # XXX Optimize certain Wildcard-containing-Wildcard patterns
+ # that can be merged
if node.type == self.syms.Matcher:
node = node.children[0] # Avoid unneeded recursion
if node.type == self.syms.Alternatives:
# Skip the odd children since they are just '|' tokens
alts = [self.compile_node(ch) for ch in node.children[::2]]
- return pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
+ if len(alts) == 1:
+ return alts[0]
+ p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
+ return p.optimize()
if node.type == self.syms.Alternative:
units = [self.compile_node(ch) for ch in node.children]
- return pytree.WildcardPattern([units], min=1, max=1)
+ if len(units) == 1:
+ return units[0]
+ p = pytree.WildcardPattern([units], min=1, max=1)
+ return p.optimize()
if node.type == self.syms.NegatedUnit:
pattern = self.compile_basic(node.children[1:])
- return pytree.NegatedPattern(pattern)
+ p = pytree.NegatedPattern(pattern)
+ return p.optimize()
assert node.type == self.syms.Unit
@@ -96,19 +115,25 @@
child = children[0]
if child.type == token.STAR:
min = 0
- max = sys.maxint
+ max = pytree.HUGE
elif child.type == token.PLUS:
min = 1
- max = sys.maxint
+ max = pytree.HUGE
+ elif child.type == token.LBRACE:
+ assert children[-1].type == token.RBRACE
+ assert len(children) in (3, 5)
+ min = max = self.get_int(children[1])
+ if len(children) == 5:
+ max = self.get_int(children[3])
else:
- assert len(children) == 5
- assert child.type == token.LBRACE
- min = self.get_int(children[1])
- max = self.get_int(children[3])
- pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
+ assert False
+ if min != 1 or max != 1:
+ pattern = pattern.optimize()
+ pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
+
if name is not None:
pattern.name = name
- return pattern
+ return pattern.optimize()
def compile_basic(self, nodes, repeat=None):
# Compile STRING | NAME [Details] | (...) | [...]
@@ -164,11 +189,15 @@
return pytree.Leaf(type, value, context=context)
-def test():
+_SAMPLE = """(a=(power< ('apply' trailer<'(' b=(not STRING) ')'> ) >){1})
+{1,1}"""
+
+
+def _test():
pc = PatternCompiler()
- pat = pc.compile_pattern("a=power< 'apply' trailer<'(' b=(not STRING) ')'> >")
+ pat = pc.compile_pattern(_SAMPLE)
print pat
if __name__ == "__main__":
- test()
+ _test()
Modified: sandbox/trunk/2to3/pgen2/driver.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/driver.py (original)
+++ sandbox/trunk/2to3/pgen2/driver.py Mon Dec 11 04:57:36 2006
@@ -25,6 +25,7 @@
from pgen2 import parse
from pgen2 import grammar
+
class Driver(object):
def __init__(self, grammar, convert=None, logger=None):
@@ -34,15 +35,16 @@
self.logger = logger
self.convert = convert
- def parse_stream_raw(self, stream, debug=False):
- """Parse a stream and return the concrete syntax tree."""
+ def parse_tokens(self, tokens, debug=False):
+ """Parse a series of tokens and return the syntax tree."""
+ # XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert)
p.setup()
lineno = 1
column = 0
type = value = start = end = line_text = None
prefix = ""
- for quintuple in tokenize.generate_tokens(stream.readline):
+ for quintuple in tokens:
type, value, start, end, line_text = quintuple
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
@@ -80,6 +82,11 @@
raise parse.ParseError("incomplete input", t, v, x)
return p.rootnode
+ def parse_stream_raw(self, stream, debug=False):
+ """Parse a stream and return the syntax tree."""
+ tokens = tokenize.generate_tokens(stream.readline)
+ return self.parse_tokens(tokens, debug)
+
def parse_stream(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
@@ -94,9 +101,17 @@
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
- from StringIO import StringIO
- stream = StringIO(text)
- return self.parse_stream(stream, debug)
+ tokens = tokenize.generate_tokens(generate_lines(text).next)
+ return self.parse_tokens(tokens, debug)
+
+
+def generate_lines(text):
+ """Generator that behaves like readline without using StringIO."""
+ for line in text.splitlines(True):
+ yield line
+ while True:
+ yield ""
+
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
@@ -120,6 +135,7 @@
g.load(gp)
return g
+
def _newer(a, b):
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
Modified: sandbox/trunk/2to3/pgen2/pgen.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/pgen.py (original)
+++ sandbox/trunk/2to3/pgen2/pgen.py Mon Dec 11 04:57:36 2006
@@ -14,13 +14,17 @@
class ParserGenerator(object):
def __init__(self, filename, stream=None):
+ close_stream = None
if stream is None:
stream = open(filename)
+ close_stream = stream.close
self.filename = filename
self.stream = stream
self.generator = tokenize.generate_tokens(stream.readline)
self.gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self.parse()
+ if close_stream is not None:
+ close_stream()
self.first = {} # map from symbol name to set of tokens
self.addfirstsets()
Modified: sandbox/trunk/2to3/pytree.py
==============================================================================
--- sandbox/trunk/2to3/pytree.py (original)
+++ sandbox/trunk/2to3/pytree.py Mon Dec 11 04:57:36 2006
@@ -11,7 +11,8 @@
__author__ = "Guido van Rossum <guido at python.org>"
-import sys
+
+HUGE = 0x7FFFFFFF # maximum repeat count, default max
class Base(object):
@@ -247,10 +248,17 @@
return object.__new__(cls, *args, **kwds)
def __repr__(self):
- return "%s(%s)" % (self.__class__.__name__,
- ", ".join("%s=%r" % (x, getattr(self, x))
- for x in ("type", "content", "name")
- if getattr(self, x) is not None))
+ args = [self.type, self.content, self.name]
+ while args and args[-1] is None:
+ del args[-1]
+ return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
+
+ def optimize(self):
+ """A subclass can define this as a hook for optimizations.
+
+ Returns either self or another node with the same effect.
+ """
+ return self
def match(self, node, results=None):
"""Does this pattern exactly match a node?
@@ -410,7 +418,7 @@
except it always uses non-greedy matching.
"""
- def __init__(self, content=None, min=0, max=sys.maxint, name=None):
+ def __init__(self, content=None, min=0, max=HUGE, name=None):
"""Initializer.
Args:
@@ -418,7 +426,7 @@
if absent, matches one node;
if present, each subsequence is an alternative [*]
min: optinal minumum number of times to match, default 0
- max: optional maximum number of times tro match, default sys.maxint
+ max: optional maximum number of times tro match, default HUGE
name: optional name assigned to this match
[*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
@@ -432,7 +440,7 @@
If content is not None, replace the dot with the parenthesized
list of alternatives, e.g. (a b c | d e | f g h)*
"""
- assert 0 <= min <= max <= sys.maxint, (min, max)
+ assert 0 <= min <= max <= HUGE, (min, max)
if content is not None:
content = tuple(map(tuple, content)) # Protect against alterations
# Check sanity of alternatives
@@ -444,6 +452,25 @@
self.max = max
self.name = name
+ def optimize(self):
+ """Optimize certain stacked wildcard patterns."""
+ subpattern = None
+ if (self.content is not None and
+ len(self.content) == 1 and len(self.content[0]) == 1):
+ subpattern = self.content[0][0]
+ if self.min == 1 and self.max == 1:
+ if self.content is None:
+ return NodePattern(name=self.name)
+ if subpattern is not None and self.name == subpattern.name:
+ return subpattern.optimize()
+ if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
+ subpattern.min <= 1 and self.name == subpattern.name):
+ return WildcardPattern(subpattern.content,
+ self.min*subpattern.min,
+ self.max*subpattern.max,
+ subpattern.name)
+ return self
+
def match(self, node, results=None):
"""Does this pattern exactly match a node?"""
return self.match_seq([node], results)
More information about the Python-checkins
mailing list