[Python-Dev] Python syntax checker ?
Jeremy Hylton
jeremy@beopen.com
Mon, 25 Sep 2000 10:50:30 -0400 (EDT)
>>>>> "JCA" == James C Ahlstrom <jim@interet.com> writes:
JCA> The following is based on trying (a great learning experience)
JCA> to write a better Python lint.
JCA> There are IMHO two problems with the current Python grammar
JCA> file. It is not possible to express operator precedence, so
JCA> deliberate shift/reduce conflicts are used instead. That makes
JCA> the parse tree complicated and non intuitive. And there is no
JCA> provision for error productions. YACC has both of these as
JCA> built-in features.
JCA> I also found speed problems with tokenize.py. AFAIK, it only
JCA> exists because tokenizer.c does not provide comments as tokens,
JCA> but eats them instead. We could modify tokenizer.c, then make
JCA> tokenize.py be the interface to the fast C tokenizer. This
JCA> eliminates the problem of updating both too.
JCA> So how about re-writing the Python grammar in YACC in order to
JCA> use its more advanced features?? The simple YACC grammar I
JCA> wrote for 1.5.2 plus an altered tokenizer.c parsed the whole
JCA> Lib/*.py in a couple seconds vs. 30 seconds for the first file
JCA> using Aaron Watters' Python lint grammar written in Python.
Why not use the actual Python parser instead of tokenize.py? I assume
it is also faster than Aaron's Python lint grammer written in Python.
The compiler in Tools/compiler uses the parser module internally and
produces an AST that is straightforward to use. (The parse tree
produced by the parser module is fairly low-level.)
There was a thread (on the compiler-sig, I believe) where Moshe and I
noodled with a simple lint-like warnings framework based on the
compiler package. I don't have the code we ended up with, but I found
an example checker in the mail archives and have included it below.
It checks for NameErrors.
I believe one useful change that Moshe and I arrived at was to avoid
the explicit stack that the code uses (via enterNamespace and
exitNamespace) and instead pass the namespace as an optional extra
argument to the visitXXX methods.
Jeremy
"""Check for NameErrors"""
from compiler import parseFile, walk
from compiler.misc import Stack, Set
import __builtin__
from UserDict import UserDict
class Warning:
def __init__(self, filename, funcname, lineno):
self.filename = filename
self.funcname = funcname
self.lineno = lineno
def __str__(self):
return self._template % self.__dict__
class UndefinedLocal(Warning):
super_init = Warning.__init__
def __init__(self, filename, funcname, lineno, name):
self.super_init(filename, funcname, lineno)
self.name = name
_template = "%(filename)s:%(lineno)s " \
"%(funcname)s undefined local %(name)s"
class NameError(UndefinedLocal):
_template = "%(filename)s:%(lineno)s " \
"%(funcname)s undefined name %(name)s"
class NameSet(UserDict):
"""Track names and the line numbers where they are referenced"""
def __init__(self):
self.data = self.names = {}
def add(self, name, lineno):
l = self.names.get(name, [])
l.append(lineno)
self.names[name] = l
class CheckNames:
def __init__(self, filename):
self.filename = filename
self.warnings = []
self.scope = Stack()
self.gUse = NameSet()
self.gDef = NameSet()
# _locals is the stack of local namespaces
# locals is the top of the stack
self._locals = Stack()
self.lUse = None
self.lDef = None
self.lGlobals = None # var declared global
# holds scope,def,use,global triples for later analysis
self.todo = []
def enterNamespace(self, node):
self.scope.push(node)
self.lUse = use = NameSet()
self.lDef = _def = NameSet()
self.lGlobals = gbl = NameSet()
self._locals.push((use, _def, gbl))
def exitNamespace(self):
self.todo.append((self.scope.top(), self.lDef, self.lUse,
self.lGlobals))
self.scope.pop()
self._locals.pop()
if self._locals:
self.lUse, self.lDef, self.lGlobals = self._locals.top()
else:
self.lUse = self.lDef = self.lGlobals = None
def warn(self, warning, funcname, lineno, *args):
args = (self.filename, funcname, lineno) + args
self.warnings.append(apply(warning, args))
def defName(self, name, lineno, local=1):
if self.lUse is None:
self.gDef.add(name, lineno)
elif local == 0:
self.gDef.add(name, lineno)
self.lGlobals.add(name, lineno)
else:
self.lDef.add(name, lineno)
def useName(self, name, lineno, local=1):
if self.lUse is None:
self.gUse.add(name, lineno)
elif local == 0:
self.gUse.add(name, lineno)
self.lUse.add(name, lineno)
else:
self.lUse.add(name, lineno)
def check(self):
for s, d, u, g in self.todo:
self._check(s, d, u, g, self.gDef)
# XXX then check the globals
def _check(self, scope, _def, use, gbl, globals):
# check for NameError
# a name is defined iff it is in def.keys()
# a name is global iff it is in gdefs.keys()
gdefs = UserDict()
gdefs.update(globals)
gdefs.update(__builtin__.__dict__)
defs = UserDict()
defs.update(gdefs)
defs.update(_def)
errors = Set()
for name in use.keys():
if not defs.has_key(name):
firstuse = use[name][0]
self.warn(NameError, scope.name, firstuse, name)
errors.add(name)
# check for UndefinedLocalNameError
# order == use & def sorted by lineno
# elements are lineno, flag, name
# flag = 0 if use, flag = 1 if def
order = []
for name, lines in use.items():
if gdefs.has_key(name) and not _def.has_key(name):
# this is a global ref, we can skip it
continue
for lineno in lines:
order.append((lineno, 0, name))
for name, lines in _def.items():
for lineno in lines:
order.append((lineno, 1, name))
order.sort()
# ready contains names that have been defined or warned about
ready = Set()
for lineno, flag, name in order:
if flag == 0: # use
if not ready.has_elt(name) and not errors.has_elt(name):
self.warn(UndefinedLocal, scope.name, lineno, name)
ready.add(name) # don't warn again
else:
ready.add(name)
# below are visitor methods
def visitFunction(self, node, noname=0):
for expr in node.defaults:
self.visit(expr)
if not noname:
self.defName(node.name, node.lineno)
self.enterNamespace(node)
for name in node.argnames:
self.defName(name, node.lineno)
self.visit(node.code)
self.exitNamespace()
return 1
def visitLambda(self, node):
return self.visitFunction(node, noname=1)
def visitClass(self, node):
for expr in node.bases:
self.visit(expr)
self.defName(node.name, node.lineno)
self.enterNamespace(node)
self.visit(node.code)
self.exitNamespace()
return 1
def visitName(self, node):
self.useName(node.name, node.lineno)
def visitGlobal(self, node):
for name in node.names:
self.defName(name, node.lineno, local=0)
def visitImport(self, node):
for name, alias in node.names:
self.defName(alias or name, node.lineno)
visitFrom = visitImport
def visitAssName(self, node):
self.defName(node.name, node.lineno)
def check(filename):
global p, checker
p = parseFile(filename)
checker = CheckNames(filename)
walk(p, checker)
checker.check()
for w in checker.warnings:
print w
if __name__ == "__main__":
import sys
# XXX need to do real arg processing
check(sys.argv[1])