Why don't people like lisp?
Andrew Dalke
adalke at mindspring.com
Wed Oct 22 00:34:12 EDT 2003
Me:
> Python has the ability to do exactly
> what you're saying (domain language -> AST -> Python code or AST ->
> compiler). It's rarely needed (I've used it twice now in my six years
> or so of Python), so why should a language cater to make that
> easy at the expense of making frequent things harder?
As an example, here's a quick hack of a way to parse a simple
stack-based language and make a native Python function out
of it. I am the first to admit that it's ugly code, but it does work.
I am curious to see the equivalent code in Lisp. Here's the spec
All tokens are separated by whitespace (space, tab, newline, and
include vertical tab and form feed if you want).
Numbers are the standard IEEE floats (excepting NaN, Inf, etc)
and represented "as usual".
The operators are addition ("+"), subtraction ("-"), multiplication
("*"), division ("/"), and exponentiation ("**"). These are binary
operators only, so to get -b use 0 b - .
The variable names are of the form [A-Za-Z_][A-Za-z0-9_]*
and must be passed to the function when making the function call.
The order of names is the same as the order the names appear
in the RPN expression, so in "b a +" the function created takes
"b" as the first argument and "a" as the second.
Here's some examples of using the Python version
>>> import rpn
>>> f = rpn.compile("a 2 b * +")
>>> f(3, 4)
11.0
>>> f(a=4, b=3)
10.0
>>> f(b=4, a=3)
11.0
>>> g = rpn.compile("1 2 3 4 5 + + + + a **")
>>> g(2)
225.0
>>> h = rpn.compile("""
... 0 b -
... b 2 **
... 4 a c * *
... -
... 0.5 **
... +
... 2 a *
... /
... """)
>>> h(a=1, b=6, c=3)
-0.55051025721682212
>>> (-6+(6*6-4*1*3)**0.5)/(2*1)
-0.55051025721682212
>>>
Note that the code also handles errors reasonably well.
Eg, the following examples show that it correctly reports
the line numbers as given in the original string
>>> h(a="q", b=6, c=3)
Traceback (most recent call last):
File "<interactive input>", line 1, in ?
File "<RPN string RPN2>", line 3, in RPN2
TypeError: can't multiply sequence to non-int
>>> h(a=1, b="", c=3)
Traceback (most recent call last):
File "<interactive input>", line 1, in ?
File "<RPN string RPN2>", line 1, in RPN2
TypeError: unsupported operand type(s) for -: 'float' and 'str'
>>> h(a=0, b=6, c=4)
Traceback (most recent call last):
File "<interactive input>", line 1, in ?
File "<RPN string RPN2>", line 7, in RPN2
ZeroDivisionError: float division
>>>
Here's an error detected by the translator
>>> f = rpn.compile("a + 2 * b")
Traceback (most recent call last):
File "<interactive input>", line 1, in ?
File "E:\dalke\rpn.py", line 122, in compile
return translate(parse(tokenize(s)))
File "E:\dalke\rpn.py", line 98, in translate
stack.add_oper(_oper_table[token.val], token)
File "E:\dalke\rpn.py", line 72, in add_oper
raise RPNError(
RPNError: Binary operator at line 0 char 3 missing terms
And here's a problem detected during tokenization
>>> rpn.compile("5t")
Traceback (most recent call last):
File "<interactive input>", line 1, in ?
File "E:\dalke\rpn.py", line 122, in compile
return translate(parse(tokenize(s)))
File "E:\dalke\rpn.py", line 56, in parse
tokens = list(tokens) # this tree is very flat :)
File "E:\dalke\rpn.py", line 46, in tokenize
raise RPNError(
RPNError: Unknown token '5t' at line 0, character 1
>>>
I expect any similar Lisp implementation to track the
line numbers for error reporting.
Andrew
dalke at dalkescientific.com
import re
import sys
from compiler import ast, misc, pycodegen
class RPNError(Exception):
pass
class Token:
VAR = 1
FLOAT = 2
OPER = 3
def __init__(self, type, val, lineno, charpos):
self.type = type
self.val = val
self.lineno = lineno
self.charpos = charpos
_symbol_re = re.compile(r"\S+")
_operators = "+ - * / **".split()
_variable_re = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$")
def tokenize(s):
for lineno, line in enumerate(s.split("\n")):
for match in _symbol_re.finditer(line):
word = match.group(0)
charpos = match.start(0) + 1
try:
yield Token(Token.FLOAT, float(word),
lineno, charpos)
continue
except ValueError:
# okay, it isn't a float
pass
if word in _operators:
yield Token(Token.OPER, word,
lineno, charpos)
continue
if _variable_re.match(word):
yield Token(Token.VAR, word,
lineno, charpos)
continue
# Hmm, wonder what it is.
raise RPNError(
"Unknown token %r at line %d, character %d" %
(word, lineno, charpos))
class ParseTree:
def __init__(self, param_names, tokens):
self.param_names = param_names
self.tokens = tokens
def parse(tokens):
tokens = list(tokens) # this tree is very flat :)
param_names = []
for token in tokens:
if token.type == Token.VAR:
if token.val not in param_names:
param_names.append(token.val)
return ParseTree(param_names, tokens)
class Stack:
def __init__(self):
self.stack = []
def add_term(self, term, token):
term.lineno = token.lineno
self.stack.append(term)
def add_oper(self, klass, token):
if len(self.stack) < 2:
raise RPNError(
"Binary operator at line %d char %d missing terms" %
(token.lineno, token.charpos))
term = klass(self.stack[-2:])
term.lineno = token.lineno
self.stack[-2:] = [term]
_id_gen = iter(xrange(sys.maxint))
_oper_table = {
"+": ast.Add,
"-": ast.Sub,
"*": ast.Mul,
"/": ast.Div,
"**": ast.Power,
}
def translate(parse_tree):
stack = Stack()
for token in parse_tree.tokens:
if token.type == Token.FLOAT:
stack.add_term(ast.Const(token.val), token)
elif token.type == Token.VAR:
stack.add_term(ast.Name(token.val),
token)
elif token.type == Token.OPER:
stack.add_oper(_oper_table[token.val], token)
else:
raise AssertionError(repr(token.type))
stack = stack.stack
if len(stack) != 1:
raise RPNError("evaluation ends with stack size %d" %
len(stack))
# go through an ugly bit of shenanigans
# (I don't like the compiler API ):
fctn_name = 'RPN' + str(_id_gen.next())
fctn = ast.Function(fctn_name,
parse_tree.param_names, [],
0, None,
ast.Stmt([ast.Return(stack[0])]))
mod = ast.Module(None, ast.Stmt([fctn]))
misc.set_filename("<RPN string " + fctn_name + ">", mod)
code = pycodegen.ModuleCodeGenerator(mod).getCode()
d = {"__builtins__": {}}
exec code in d, d
return d[fctn_name]
def compile(s):
return translate(parse(tokenize(s)))
def main():
assert compile("a 2 3 + -")(a=6) == 1
assert compile("a 2 3 + -")(7) == 2
assert compile("a b *")(2, 3) == 6
assert compile("a b -")(b=2, a=3) == 1
assert compile("1 2 3 4 + + +")() == 10
print "All tests passed"
if __name__ == "__main__":
main()
More information about the Python-list
mailing list