Author: george.boutsioukis
Date: Tue Aug 31 15:38:53 2010
New Revision: 84375
Log:
Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.
Modified:
sandbox/trunk/2to3/lib2to3/btm_matcher.py
sandbox/trunk/2to3/lib2to3/btm_utils.py
sandbox/trunk/2to3/lib2to3/fixer_base.py
sandbox/trunk/2to3/lib2to3/pytree.py
sandbox/trunk/2to3/lib2to3/refactor.py
sandbox/trunk/2to3/lib2to3/tests/test_pytree.py
Modified: sandbox/trunk/2to3/lib2to3/btm_matcher.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/btm_matcher.py (original)
+++ sandbox/trunk/2to3/lib2to3/btm_matcher.py Tue Aug 31 15:38:53 2010
@@ -8,23 +8,21 @@
__author__ = "George Boutsioukis <gboutsioukis(a)gmail.com>"
import logging
-from .btm_utils import *
+import itertools
+import pytree
+from collections import defaultdict
+
+from .btm_utils import reduce_tree
class BMNode(object):
"""Class for a node of the Aho-Corasick automaton used in matching"""
- last_id = 0
+ count = itertools.count()
def __init__(self):
self.transition_table = {}
self.fixers = []
- self.id = BMNode.new_id()
+ self.id = next(BMNode.count)
self.content = ''
- @classmethod
- def new_id(cls):
- new_id = cls.last_id
- cls.last_id += 1
- return new_id
-
class BottomMatcher(object):
"""The main matcher class. After instantiating the patterns should
be added using the add_fixer method"""
@@ -54,7 +52,7 @@
if not pattern:
#print("empty pattern")
return [start]
- if type(pattern[0]) is tuple:
+ if isinstance(pattern[0], tuple):
#alternatives
#print("alternatives")
match_nodes = []
@@ -67,20 +65,20 @@
return match_nodes
else:
#single token
- #not last
- if pattern[0] not in start.transition_table.keys():
- #transition did not exist, create new
- next_node = BMNode()
- start.transition_table[pattern[0]] = next_node
- else:
- #transition exists already, follow
- next_node = start.transition_table[pattern[0]]
-
- if pattern[1:]:
- end_nodes = self.add(pattern[1:], start=next_node)
- else:
- end_nodes = [next_node]
- return end_nodes
+ #not last
+ if pattern[0] not in start.transition_table:
+ #transition did not exist, create new
+ next_node = BMNode()
+ start.transition_table[pattern[0]] = next_node
+ else:
+ #transition exists already, follow
+ next_node = start.transition_table[pattern[0]]
+
+ if pattern[1:]:
+ end_nodes = self.add(pattern[1:], start=next_node)
+ else:
+ end_nodes = [next_node]
+ return end_nodes
def run(self, leaves):
"""The main interface with the bottom matcher. The tree is
@@ -99,14 +97,14 @@
A dictionary of node matches with fixers as the keys
"""
current_ac_node = self.root
- results = {}
+ results = defaultdict(list)
for leaf in leaves:
current_ast_node = leaf
- while(current_ast_node):
+ while current_ast_node:
current_ast_node.was_checked = True
for child in current_ast_node.children:
# multiple statements, recheck
- if hasattr(child, "value") and child.value==';':
+ if isinstance(child, pytree.Leaf) and child.value == u";":
current_ast_node.was_checked = False
break
if current_ast_node.type == 1:
@@ -115,24 +113,24 @@
else:
node_token = current_ast_node.type
- if node_token in current_ac_node.transition_table.keys():
+ if node_token in current_ac_node.transition_table:
#token matches
current_ac_node = current_ac_node.transition_table[node_token]
for fixer in current_ac_node.fixers:
- if not fixer in results.keys():
+ if not fixer in results:
results[fixer] = []
results[fixer].append(current_ast_node)
else:
#matching failed, reset automaton
current_ac_node = self.root
- if current_ast_node.parent is not None \
- and current_ast_node.parent.was_checked:
+ if (current_ast_node.parent is not None
+ and current_ast_node.parent.was_checked):
#the rest of the tree upwards has been checked, next leaf
break
#recheck the rejected node once from the root
- if node_token in current_ac_node.transition_table.keys():
+ if node_token in current_ac_node.transition_table:
#token matches
current_ac_node = current_ac_node.transition_table[node_token]
for fixer in current_ac_node.fixers:
Modified: sandbox/trunk/2to3/lib2to3/btm_utils.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/btm_utils.py (original)
+++ sandbox/trunk/2to3/lib2to3/btm_utils.py Tue Aug 31 15:38:53 2010
@@ -4,10 +4,10 @@
from .pgen2 import grammar, token
from .pygram import pattern_symbols, python_symbols
-syms = pattern_symbols.__dict__
-pysyms = python_symbols.__dict__
+syms = pattern_symbols
+pysyms = python_symbols
tokens = grammar.opmap
-token_labels = token.__dict__
+token_labels = token
TYPE_ANY = -1
TYPE_ALTERNATIVES = -2
@@ -18,19 +18,17 @@
pattern tree during the conversion to sets of leaf-to-root
subpatterns"""
- def __init__(self, type=None, name=None, times=1):
+ def __init__(self, type=None, name=None):
self.type = type
self.name = name
- self.times = times
self.children = []
self.leaf = False
self.parent = None
self.alternatives = []
- self.alternatives_number = None
self.group = []
def __repr__(self):
- return str(self.type) + ' ' + str(self.name) + ' ' + str(self.times)
+ return str(self.type) + ' ' + str(self.name)
def leaf_to_root(self):
"""Internal method. Returns a characteristic path of the
@@ -65,7 +63,7 @@
subp = None
break
- if node.type == token_labels['NAME'] and node.name:
+ if node.type == token_labels.NAME and node.name:
#in case of type=name, use the name instead
subp.append(node.name)
else:
@@ -113,17 +111,16 @@
new_node = None
#switch on the node type
- if node.type == syms['Matcher']:
-
+ if node.type == syms.Matcher:
#skip
- new_node = reduce_tree(node.children[0])
+ node = node.children[0]
- elif node.type == syms['Alternatives']:
+ if node.type == syms.Alternatives :
#2 cases
- if len(node.children)<=2:
+ if len(node.children) <= 2:
#just a single 'Alternative', skip this node
new_node = reduce_tree(node.children[0], parent)
- elif len(node.children)>2:
+ else:
#real alternatives
new_node = MinNode(type=TYPE_ALTERNATIVES)
#skip odd children('|' tokens)
@@ -133,10 +130,8 @@
reduced = reduce_tree(child, new_node)
if reduced is not None:
new_node.children.append(reduced)
- else:
- raise Exception
- elif node.type == syms['Alternative']:
- if len(node.children)>1:
+ elif node.type == syms.Alternative:
+ if len(node.children) > 1:
new_node = MinNode(type=TYPE_GROUP)
for child in node.children:
@@ -150,17 +145,17 @@
else:
new_node = reduce_tree(node.children[0], parent)
- elif node.type == syms['Unit']:
- if hasattr(node.children[0], "value") and \
- node.children[0].value == '(':
+ elif node.type == syms.Unit:
+ if (isinstance(node.children[0], pytree.Leaf) and
+ node.children[0].value == '('):
#skip parentheses
return reduce_tree(node.children[1], parent)
- if (hasattr(node.children[0], "value") and \
- node.children[0].value == '[') \
- or \
- (len(node.children)>1 and \
- hasattr(node.children[1], "value") and \
- node.children[1].value == '['):
+ if ((isinstance(node.children[0], pytree.Leaf) and
+ node.children[0].value == '[')
+ or
+ (len(node.children)>1 and
+ hasattr(node.children[1], "value") and
+ node.children[1].value == '[')):
#skip whole unit if its optional
return None
@@ -172,13 +167,13 @@
has_variable_name = False
for child in node.children:
- if child.type == syms['Details']:
+ if child.type == syms.Details:
leaf = False
details_node = child
- elif child.type == syms['Repeater']:
+ elif child.type == syms.Repeater:
has_repeater = True
repeater_node = child
- elif child.type == syms['Alternatives']:
+ elif child.type == syms.Alternatives:
alternatives_node = child
if hasattr(child, 'value') and child.value == '=': # variable name
has_variable_name = True
@@ -194,25 +189,25 @@
name_leaf = node.children[0]
#set node type
- if name_leaf.type == token_labels['NAME']:
+ if name_leaf.type == token_labels.NAME:
#(python) non-name or wildcard
if name_leaf.value == 'any':
new_node = MinNode(type=TYPE_ANY)
else:
- if name_leaf.value in token_labels:
- new_node = MinNode(type=token_labels[name_leaf.value])
+ if hasattr(token_labels, name_leaf.value):
+ new_node = MinNode(type=getattr(token_labels, name_leaf.value))
else:
- new_node = MinNode(type=pysyms[name_leaf.value])
+ new_node = MinNode(type=getattr(pysyms, name_leaf.value))
- elif name_leaf.type == token_labels['STRING']:
+ elif name_leaf.type == token_labels.STRING:
#(python) name or character; remove the apostrophes from
#the string value
- name = name_leaf.value[1:][:-1]
+ name = name_leaf.value.strip("'")
if name in tokens:
new_node = MinNode(type=tokens[name])
else:
- new_node = MinNode(type=token_labels['NAME'], name=name)
- elif name_leaf.type == syms['Alternatives']:
+ new_node = MinNode(type=token_labels.NAME, name=name)
+ elif name_leaf.type == syms.Alternatives:
new_node = reduce_tree(alternatives_node, parent)
#handle repeaters
@@ -225,11 +220,12 @@
pass
else:
#TODO: handle {min, max} repeaters
+ raise NotImplementedError
pass
#add children
if details_node and new_node is not None:
- for child in details_node.children[1:][:-1]:
+ for child in details_node.children[1:-1]:
#skip '<', '>' markers
reduced = reduce_tree(child, new_node)
if reduced is not None:
@@ -244,9 +240,9 @@
Current order used is:
names > common_names > common_chars
"""
- if type(subpatterns) is not list:
+ if not isinstance(subpatterns, list):
return subpatterns
- if type(subpatterns) is list and len(subpatterns)==1:
+ if len(subpatterns)==1:
return subpatterns[0]
# first pick out the ones containing variable names
@@ -258,10 +254,10 @@
for subpattern in subpatterns:
if any(rec_test(subpattern, lambda x: type(x) is str)):
if any(rec_test(subpattern,
- lambda x: type(x) is str and x in common_chars)):
+ lambda x: isinstance(x, str) and x in common_chars)):
subpatterns_with_common_chars.append(subpattern)
elif any(rec_test(subpattern,
- lambda x: type(x) is str and x in common_names)):
+ lambda x: isinstance(x, str) and x in common_names)):
subpatterns_with_common_names.append(subpattern)
else:
@@ -274,13 +270,13 @@
elif subpatterns_with_common_chars:
subpatterns = subpatterns_with_common_chars
# of the remaining subpatterns pick out the longest one
- return sorted(subpatterns, key=len, reverse=True)[0]
+ return max(subpatterns, key=len)
def rec_test(sequence, test_func):
"""Tests test_func on all items of sequence and items of included
sub-iterables"""
for x in sequence:
- if type(x) is list or type(x) is tuple:
+ if isinstance(x, (list, tuple)):
for y in rec_test(x, test_func):
yield y
else:
Modified: sandbox/trunk/2to3/lib2to3/fixer_base.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/fixer_base.py (original)
+++ sandbox/trunk/2to3/lib2to3/fixer_base.py Tue Aug 31 15:38:53 2010
@@ -65,8 +65,9 @@
self.{pattern,PATTERN} in .match().
"""
if self.PATTERN is not None:
- self.pattern, self.pattern_tree = \
- PatternCompiler().compile_pattern(self.PATTERN, with_tree=True)
+ PC = PatternCompiler()
+ self.pattern, self.pattern_tree = PC.compile_pattern(self.PATTERN,
+ with_tree=True)
def set_filename(self, filename):
"""Set the filename, and a logger derived from it.
Modified: sandbox/trunk/2to3/lib2to3/pytree.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/pytree.py (original)
+++ sandbox/trunk/2to3/lib2to3/pytree.py Tue Aug 31 15:38:53 2010
@@ -216,21 +216,12 @@
for child in self.children:
for x in child.leaves():
yield x
- if type(self) is Leaf:
- yield self
-
- def to_root(self):
- yield self
- if self.parent:
- for p in self.parent.to_root():
- yield p
def depth(self):
if self.parent is None:
return 0
return 1 + self.parent.depth()
-
def get_suffix(self):
"""
Return the string immediately following the invocant node. This is
@@ -252,7 +243,7 @@
def __init__(self,type, children,
context=None,
prefix=None,
- fixers_applied=[]):
+ fixers_applied=None):
"""
Initializer.
@@ -269,7 +260,10 @@
ch.parent = self
if prefix is not None:
self.prefix = prefix
- self.fixers_applied = fixers_applied[:]
+ if fixers_applied:
+ self.fixers_applied = fixers_applied[:]
+ else:
+ self.fixers_applied = None
def __repr__(self):
"""Return a canonical string representation."""
@@ -308,7 +302,7 @@
"""Return a pre-order iterator for the tree."""
yield self
for child in self.children:
- for node in child.post_order():
+ for node in child.pre_order():
yield node
def _prefix_getter(self):
@@ -409,6 +403,9 @@
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied)
+ def leaves(self):
+ yield self
+
def post_order(self):
"""Return a post-order iterator for the tree."""
yield self
Modified: sandbox/trunk/2to3/lib2to3/refactor.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/refactor.py (original)
+++ sandbox/trunk/2to3/lib2to3/refactor.py Tue Aug 31 15:38:53 2010
@@ -422,9 +422,9 @@
# obtain a set of candidate nodes
match_set = self.BM.run(tree.leaves())
- while any(list(match_set.values())):
+ while any(match_set.values()):
for fixer in self.BM.fixers:
- if fixer in match_set.keys() and match_set[fixer]:
+ if fixer in match_set and match_set[fixer]:
#sort by depth; apply fixers from bottom(of the AST) to top
match_set[fixer].sort(key=pytree.Base.depth, reverse=True)
@@ -444,7 +444,7 @@
# previous transformation ; skip
continue
- if fixer in node.fixers_applied:
+ if node.fixers_applied and fixer in node.fixers_applied:
# do not apply the same fixer again
continue
@@ -458,14 +458,17 @@
for node in new.post_order():
# do not apply the fixer again to
# this or any subnode
+ if not node.fixers_applied:
+ node.fixers_applied = []
node.fixers_applied.append(fixer)
# update the original match set for
# the added code
new_matches = self.BM.run(new.leaves())
- for fxr in new_matches.keys():
- if not fxr in list(match_set.keys()):
+ for fxr in new_matches:
+ if not fxr in match_set:
match_set[fxr]=[]
+
match_set[fxr].extend(new_matches[fxr])
for fixer in chain(self.pre_order, self.post_order):
Modified: sandbox/trunk/2to3/lib2to3/tests/test_pytree.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/tests/test_pytree.py (original)
+++ sandbox/trunk/2to3/lib2to3/tests/test_pytree.py Tue Aug 31 15:38:53 2010
@@ -178,6 +178,27 @@
self.assertEqual(str(n1), "foo**bar")
self.assertTrue(isinstance(n1.children, list))
+ def test_leaves(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "bar")
+ l3 = pytree.Leaf(100, "fooey")
+ n2 = pytree.Node(1000, [l1, l2])
+ n3 = pytree.Node(1000, [l3])
+ n1 = pytree.Node(1000, [n2, n3])
+
+ self.assertEqual(list(n1.leaves()), [l1, l2, l3])
+
+ def test_depth(self):
+ l1 = pytree.Leaf(100, "foo")
+ l2 = pytree.Leaf(100, "bar")
+ n2 = pytree.Node(1000, [l1, l2])
+ n3 = pytree.Node(1000, [])
+ n1 = pytree.Node(1000, [n2, n3])
+
+ self.assertEqual(l1.depth(), 2)
+ self.assertEqual(n3.depth(), 1)
+ self.assertEqual(n1.depth(), 0)
+
def test_post_order(self):
l1 = pytree.Leaf(100, "foo")
l2 = pytree.Leaf(100, "bar")