[Python-checkins] r52942 - sandbox/trunk/2to3/PatternGrammar.txt sandbox/trunk/2to3/fix_apply.py sandbox/trunk/2to3/fix_has_key.py sandbox/trunk/2to3/patcomp.py
guido.van.rossum
python-checkins at python.org
Wed Dec 6 23:58:16 CET 2006
Author: guido.van.rossum
Date: Wed Dec 6 23:58:15 2006
New Revision: 52942
Added:
sandbox/trunk/2to3/PatternGrammar.txt (contents, props changed)
sandbox/trunk/2to3/patcomp.py (contents, props changed)
Modified:
sandbox/trunk/2to3/fix_apply.py
sandbox/trunk/2to3/fix_has_key.py
Log:
Add a pattern compiler, to take away the tedious work of building
matching patterns by hand.
(Still needed: a language to specify replacements, too. One thing at a time.)
Added: sandbox/trunk/2to3/PatternGrammar.txt
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/PatternGrammar.txt Wed Dec 6 23:58:15 2006
@@ -0,0 +1,26 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# A grammar to describe tree matching patterns.
+# Not shown here:
+# - 'TOKEN' stands for any token (leaf node)
+# - 'any' stands for any node (leaf or interior)
+# With 'any' we can still specify the sub-structure.
+
+# The start symbol is 'Matcher'.
+
+Matcher: Alternatives ENDMARKER
+
+Alternatives: Alternative ('|' Alternative)*
+
+Alternative: Unit+
+
+Unit: [NAME '='] ( STRING [Repeater]
+ | NAME [Details] [Repeater]
+ | '(' Alternatives ')' [Repeater]
+ | '[' Alternatives ']'
+ )
+
+Repeater: '*' | '+' | '{' NUMBER ',' NUMBER '}'
+
+Details: '<' Alternatives '>'
Modified: sandbox/trunk/2to3/fix_apply.py
==============================================================================
--- sandbox/trunk/2to3/fix_apply.py (original)
+++ sandbox/trunk/2to3/fix_apply.py Wed Dec 6 23:58:15 2006
@@ -16,6 +16,7 @@
from pgen2 import driver
import pytree
+import patcomp
logging.basicConfig(level=logging.DEBUG)
@@ -62,12 +63,8 @@
n_doublestar = pytree.Leaf(token.DOUBLESTAR, "**")
# Tree matching patterns
-p_apply = pytree.NodePattern(syms.power,
- (pytree.LeafPattern(token.NAME, "apply"),
- pytree.NodePattern(syms.trailer,
- (pytree.LeafPattern(token.LPAR),
- pytree.NodePattern(name="args"),
- pytree.LeafPattern(token.RPAR)))))
+pat_compile = patcomp.PatternCompiler().compile_pattern
+p_apply = pat_compile("power< 'apply' trailer<'(' args=any ')'> >")
def fix_apply(node):
Modified: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- sandbox/trunk/2to3/fix_has_key.py (original)
+++ sandbox/trunk/2to3/fix_has_key.py Wed Dec 6 23:58:15 2006
@@ -16,6 +16,7 @@
from pgen2 import driver
import pytree
+import patcomp
logging.basicConfig(level=logging.DEBUG)
@@ -53,13 +54,9 @@
n_comma = pytree.Leaf(token.COMMA, ",")
# Tree matching patterns
-p_has_key = pytree.NodePattern(syms.trailer,
- (pytree.LeafPattern(token.DOT),
- pytree.LeafPattern(token.NAME, "has_key")))
-p_trailer_args = pytree.NodePattern(syms.trailer,
- (pytree.LeafPattern(token.LPAR),
- pytree.WildcardPattern(name="args"),
- pytree.LeafPattern(token.RPAR)))
+pat_compile = patcomp.PatternCompiler().compile_pattern
+p_has_key = pat_compile("trailer<'.' 'has_key'>")
+p_trailer_args = pat_compile("trailer<'(' args=(any{1,1}) ')'>")
def fix_has_key(node):
Added: sandbox/trunk/2to3/patcomp.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/patcomp.py Wed Dec 6 23:58:15 2006
@@ -0,0 +1,159 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Pattern compiler.
+
+The grammer is taken from PatternGrammar.txt.
+
+The compiler compiles a pattern to a pytree.*Pattern instance.
+"""
+
+__author__ = "Guido van Rossum <guido at python.org>"
+
+# Python tokens
+import sys
+import token
+
+# Fairly local imports
+from pgen2 import driver
+from pgen2 import literals
+
+# Really local imports
+import pytree
+
+
+class Symbols(object):
+
+ def __init__(self, grammar):
+ """Initializer.
+
+ Creates an attribute for each grammar symbol (nonterminal),
+ whose value is the symbol's type (an int >= 256).
+ """
+ self._grammar = grammar
+ for name in grammar.symbol2number:
+ setattr(self, name, grammar.symbol2number[name])
+
+
+class PatternCompiler(object):
+
+ def __init__(self, grammar_file="PatternGrammar.txt"):
+ """Initializer.
+
+ Takes an optional alternative filename for the pattern grammar.
+ """
+ self.grammar = driver.load_grammar(grammar_file)
+ self.syms = Symbols(self.grammar)
+ self.pygrammar = driver.load_grammar("Grammar.txt")
+ self.pysyms = Symbols(self.pygrammar)
+ self.driver = driver.Driver(self.grammar, convert=pattern_convert)
+
+ def compile_pattern(self, input, debug=False):
+ """Compiles a pattern string to a nested pytree.*Pattern object."""
+ root = self.driver.parse_string(input, debug=debug)
+ return self.compile_node(root)
+
+ def compile_node(self, node):
+ """Compiles a node, recursively.
+
+ This is one big switch on the node type.
+ """
+ # XXX Leave the optimizations to later
+ if node.type == self.syms.Matcher:
+ node = node.children[0] # Avoid unneeded recursion
+
+ if node.type == self.syms.Alternatives:
+ # Skip the odd children since they are just '|' tokens
+ alts = [self.compile_node(ch) for ch in node.children[::2]]
+ return pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
+
+ if node.type == self.syms.Alternative:
+ units = [self.compile_node(ch) for ch in node.children]
+ return pytree.WildcardPattern([units], min=1, max=1)
+
+ assert node.type == self.syms.Unit
+
+ name = None
+ nodes = node.children
+ if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
+ name = nodes[0].value
+ nodes = nodes[2:]
+ repeat = None
+ if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
+ repeat = nodes[-1]
+ nodes = nodes[:-1]
+ # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
+ assert len(nodes) >= 1
+ node = nodes[0]
+ if node.type == token.STRING:
+ value = literals.evalString(node.value)
+ pattern = pytree.LeafPattern(content=value)
+ elif node.type == token.NAME:
+ value = node.value
+ if value.isupper():
+ pattern = TOKEN_MAP[value]
+ else:
+ if value == "any":
+ type = None
+ elif not value.startswith("_"):
+ type = getattr(self.pysyms, value) # XXX KeyError
+ if nodes[1:]: # Details present
+ content = [self.compile_node(nodes[1].children[1])]
+ else:
+ content = None
+ pattern = pytree.NodePattern(type, content)
+ elif node.value == "(":
+ pattern = self.compile_node(nodes[1])
+ elif node.value == "[":
+ assert repeat is None
+ subpattern = self.compile_node(nodes[1])
+ pattern = pytree.WildcardPattern([[subpattern]], min=0, max=1)
+ if repeat is not None:
+ assert repeat.type == self.syms.Repeater
+ children = repeat.children
+ child = children[0]
+ if child.type == token.STAR:
+ min = 0
+ max = sys.maxint
+ elif child.type == token.PLUS:
+ min = 1
+ max = sys.maxint
+ else:
+ assert len(children) == 5
+ assert child.type == token.LBRACE
+ min = self.get_int(children[1])
+ max = self.get_int(children[3])
+ pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
+ if name is not None:
+ pattern.name = name
+ return pattern
+
+ def get_int(self, node):
+ assert node.type == token.NUMBER
+ return int(node.value)
+
+
+# Map named tokens to the type value for a LeafPattern
+TOKEN_MAP = {"NAME": token.NAME,
+ "STRING": token.STRING,
+ "NUMBER": token.NUMBER,
+ "TOKEN": None}
+
+
+def pattern_convert(grammar, raw_node_info):
+ """Converts raw node information to a Node or Leaf instance."""
+ type, value, context, children = raw_node_info
+ if children or type in grammar.number2symbol:
+ return pytree.Node(type, children, context=context)
+ else:
+ return pytree.Leaf(type, value, context=context)
+
+
+def test():
+ pc = PatternCompiler()
+ pat = pc.compile_pattern("a=power< 'apply' trailer<'(' b=any ')'> >")
+ print pat
+
+
+if __name__ == "__main__":
+ test()
More information about the Python-checkins
mailing list