[Python-checkins] r52942 - sandbox/trunk/2to3/PatternGrammar.txt sandbox/trunk/2to3/fix_apply.py sandbox/trunk/2to3/fix_has_key.py sandbox/trunk/2to3/patcomp.py

guido.van.rossum python-checkins at python.org
Wed Dec 6 23:58:16 CET 2006


Author: guido.van.rossum
Date: Wed Dec  6 23:58:15 2006
New Revision: 52942

Added:
   sandbox/trunk/2to3/PatternGrammar.txt   (contents, props changed)
   sandbox/trunk/2to3/patcomp.py   (contents, props changed)
Modified:
   sandbox/trunk/2to3/fix_apply.py
   sandbox/trunk/2to3/fix_has_key.py
Log:
Add a pattern compiler, to take away the tedious work of building
matching patterns by hand.

(Still needed: a language to specify replacements, too.  One thing at a time.)


Added: sandbox/trunk/2to3/PatternGrammar.txt
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/PatternGrammar.txt	Wed Dec  6 23:58:15 2006
@@ -0,0 +1,26 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# A grammar to describe tree matching patterns.
+# Not shown here:
+# - 'TOKEN' stands for any token (leaf node)
+# - 'any' stands for any node (leaf or interior)
+# With 'any' we can still specify the sub-structure.
+
+# The start symbol is 'Matcher'.
+
+Matcher: Alternatives ENDMARKER
+
+Alternatives: Alternative ('|' Alternative)*
+
+Alternative: Unit+
+
+Unit: [NAME '='] ( STRING [Repeater]
+                 | NAME [Details] [Repeater]
+                 | '(' Alternatives ')' [Repeater]
+                 | '[' Alternatives ']'
+		 )
+
+Repeater: '*' | '+' | '{' NUMBER ',' NUMBER '}'
+
+Details: '<' Alternatives '>'

Modified: sandbox/trunk/2to3/fix_apply.py
==============================================================================
--- sandbox/trunk/2to3/fix_apply.py	(original)
+++ sandbox/trunk/2to3/fix_apply.py	Wed Dec  6 23:58:15 2006
@@ -16,6 +16,7 @@
 from pgen2 import driver
 
 import pytree
+import patcomp
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -62,12 +63,8 @@
 n_doublestar = pytree.Leaf(token.DOUBLESTAR, "**")
 
 # Tree matching patterns
-p_apply = pytree.NodePattern(syms.power,
-                             (pytree.LeafPattern(token.NAME, "apply"),
-                              pytree.NodePattern(syms.trailer,
-                                                 (pytree.LeafPattern(token.LPAR),
-                                                  pytree.NodePattern(name="args"),
-                                                  pytree.LeafPattern(token.RPAR)))))
+pat_compile = patcomp.PatternCompiler().compile_pattern
+p_apply = pat_compile("power< 'apply' trailer<'(' args=any ')'> >")
 
 
 def fix_apply(node):

Modified: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- sandbox/trunk/2to3/fix_has_key.py	(original)
+++ sandbox/trunk/2to3/fix_has_key.py	Wed Dec  6 23:58:15 2006
@@ -16,6 +16,7 @@
 from pgen2 import driver
 
 import pytree
+import patcomp
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -53,13 +54,9 @@
 n_comma = pytree.Leaf(token.COMMA, ",")
 
 # Tree matching patterns
-p_has_key = pytree.NodePattern(syms.trailer,
-                               (pytree.LeafPattern(token.DOT),
-                                pytree.LeafPattern(token.NAME, "has_key")))
-p_trailer_args = pytree.NodePattern(syms.trailer,
-                                    (pytree.LeafPattern(token.LPAR),
-                                     pytree.WildcardPattern(name="args"),
-                                     pytree.LeafPattern(token.RPAR)))
+pat_compile = patcomp.PatternCompiler().compile_pattern
+p_has_key = pat_compile("trailer<'.' 'has_key'>")
+p_trailer_args = pat_compile("trailer<'(' args=(any{1,1}) ')'>")
 
 
 def fix_has_key(node):

Added: sandbox/trunk/2to3/patcomp.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/patcomp.py	Wed Dec  6 23:58:15 2006
@@ -0,0 +1,159 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Pattern compiler.
+
+The grammer is taken from PatternGrammar.txt.
+
+The compiler compiles a pattern to a pytree.*Pattern instance.
+"""
+
+__author__ = "Guido van Rossum <guido at python.org>"
+
+# Python tokens
+import sys
+import token
+
+# Fairly local imports
+from pgen2 import driver
+from pgen2 import literals
+
+# Really local imports
+import pytree
+
+
+class Symbols(object):
+
+    def __init__(self, grammar):
+        """Initializer.
+
+        Creates an attribute for each grammar symbol (nonterminal),
+        whose value is the symbol's type (an int >= 256).
+        """
+        self._grammar = grammar
+        for name in grammar.symbol2number:
+            setattr(self, name, grammar.symbol2number[name])
+
+
+class PatternCompiler(object):
+
+    def __init__(self, grammar_file="PatternGrammar.txt"):
+        """Initializer.
+
+        Takes an optional alternative filename for the pattern grammar.
+        """
+        self.grammar = driver.load_grammar(grammar_file)
+        self.syms = Symbols(self.grammar)
+        self.pygrammar = driver.load_grammar("Grammar.txt")
+        self.pysyms = Symbols(self.pygrammar)
+        self.driver = driver.Driver(self.grammar, convert=pattern_convert)
+
+    def compile_pattern(self, input, debug=False):
+        """Compiles a pattern string to a nested pytree.*Pattern object."""
+        root = self.driver.parse_string(input, debug=debug)
+        return self.compile_node(root)
+
+    def compile_node(self, node):
+        """Compiles a node, recursively.
+
+        This is one big switch on the node type.
+        """
+        # XXX Leave the optimizations to later
+        if node.type == self.syms.Matcher:
+            node = node.children[0] # Avoid unneeded recursion
+
+        if node.type == self.syms.Alternatives:
+            # Skip the odd children since they are just '|' tokens
+            alts = [self.compile_node(ch) for ch in node.children[::2]]
+            return pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
+
+        if node.type == self.syms.Alternative:
+            units = [self.compile_node(ch) for ch in node.children]
+            return pytree.WildcardPattern([units], min=1, max=1)
+
+        assert node.type == self.syms.Unit
+
+        name = None
+        nodes = node.children
+        if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
+            name = nodes[0].value
+            nodes = nodes[2:]
+        repeat = None
+        if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
+            repeat = nodes[-1]
+            nodes = nodes[:-1]
+        # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
+        assert len(nodes) >= 1
+        node = nodes[0]
+        if node.type == token.STRING:
+            value = literals.evalString(node.value)
+            pattern = pytree.LeafPattern(content=value)
+        elif node.type == token.NAME:
+            value = node.value
+            if value.isupper():
+                pattern = TOKEN_MAP[value]
+            else:
+                if value == "any":
+                    type = None
+                elif not value.startswith("_"):
+                    type = getattr(self.pysyms, value) # XXX KeyError
+                if nodes[1:]: # Details present
+                    content = [self.compile_node(nodes[1].children[1])]
+                else:
+                    content = None
+                pattern = pytree.NodePattern(type, content)
+        elif node.value == "(":
+            pattern = self.compile_node(nodes[1])
+        elif node.value == "[":
+            assert repeat is None
+            subpattern = self.compile_node(nodes[1])
+            pattern = pytree.WildcardPattern([[subpattern]], min=0, max=1)
+        if repeat is not None:
+            assert repeat.type == self.syms.Repeater
+            children = repeat.children
+            child = children[0]
+            if child.type == token.STAR:
+                min = 0
+                max = sys.maxint
+            elif child.type == token.PLUS:
+                min = 1
+                max = sys.maxint
+            else:
+                assert len(children) == 5
+                assert child.type == token.LBRACE
+                min = self.get_int(children[1])
+                max = self.get_int(children[3])
+            pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
+        if name is not None:
+            pattern.name = name
+        return pattern
+
+    def get_int(self, node):
+        assert node.type == token.NUMBER
+        return int(node.value)
+
+
+# Map named tokens to the type value for a LeafPattern
+TOKEN_MAP = {"NAME": token.NAME,
+             "STRING": token.STRING,
+             "NUMBER": token.NUMBER,
+             "TOKEN": None}
+
+
+def pattern_convert(grammar, raw_node_info):
+    """Converts raw node information to a Node or Leaf instance."""
+    type, value, context, children = raw_node_info
+    if children or type in grammar.number2symbol:
+        return pytree.Node(type, children, context=context)
+    else:
+        return pytree.Leaf(type, value, context=context)
+
+
+def test():
+    pc = PatternCompiler()
+    pat = pc.compile_pattern("a=power< 'apply' trailer<'(' b=any ')'> >")
+    print pat
+
+
+if __name__ == "__main__":
+    test()


More information about the Python-checkins mailing list