Why don't people like lisp?

Wed Oct 22 00:34:12 EDT 2003

Me:
> Python has the ability to do exactly
> what you're saying (domain language -> AST -> Python code or AST ->
> compiler).  It's rarely needed (I've used it twice now in my six years
> or so of Python), so why should a language cater to make that
> easy at the expense of making frequent things harder?

As an example, here's a quick hack of a way to parse a simple
stack-based language and make a native Python function out
of it.  I am the first to admit that it's ugly code, but it does work.

I am curious to see the equivalent code in Lisp.  Here's the spec

All tokens are separated by whitespace (space, tab, newline, and
include vertical tab and form feed if you want).

Numbers are the standard IEEE floats (excepting NaN, Inf, etc)
and represented "as usual".

The operators are addition ("+"), subtraction ("-"), multiplication
("*"), division ("/"), and exponentiation ("**").  These are binary
operators only, so to get -b use 0 b - .

The variable names are of the form [A-Za-Z_][A-Za-z0-9_]*
and must be passed to the function when making the function call.
The order of names is the same as the order the names appear
in the RPN expression, so in "b a +" the function created takes
"b" as the first argument and "a" as the second.

Here's some examples of using the Python version

>>> import rpn
>>> f = rpn.compile("a 2 b * +")
>>> f(3, 4)
11.0
>>> f(a=4, b=3)
10.0
>>> f(b=4, a=3)
11.0
>>> g = rpn.compile("1 2 3 4 5 + + + + a **")
>>> g(2)
225.0
>>> h = rpn.compile("""
...   0 b -
...   b 2 **
...   4 a c * *
...      -
...   0.5 **
...   +
...   2 a *
...     /
...     """)
>>> h(a=1, b=6, c=3)
-0.55051025721682212
>>> (-6+(6*6-4*1*3)**0.5)/(2*1)
-0.55051025721682212
>>>

Note that the code also handles errors reasonably well.
Eg, the following examples show that it correctly reports
the line numbers as given in the original string

>>> h(a="q", b=6, c=3)
Traceback (most recent call last):
  File "<interactive input>", line 1, in ?
  File "<RPN string RPN2>", line 3, in RPN2
TypeError: can't multiply sequence to non-int
>>> h(a=1, b="", c=3)
Traceback (most recent call last):
  File "<interactive input>", line 1, in ?
  File "<RPN string RPN2>", line 1, in RPN2
TypeError: unsupported operand type(s) for -: 'float' and 'str'
>>> h(a=0, b=6, c=4)
Traceback (most recent call last):
  File "<interactive input>", line 1, in ?
  File "<RPN string RPN2>", line 7, in RPN2
ZeroDivisionError: float division
>>>

Here's an error detected by the translator

>>> f = rpn.compile("a + 2 * b")
Traceback (most recent call last):
  File "<interactive input>", line 1, in ?
  File "E:\dalke\rpn.py", line 122, in compile
    return translate(parse(tokenize(s)))
  File "E:\dalke\rpn.py", line 98, in translate
    stack.add_oper(_oper_table[token.val], token)
  File "E:\dalke\rpn.py", line 72, in add_oper
    raise RPNError(
RPNError: Binary operator at line 0 char 3 missing terms

And here's a problem detected during tokenization

>>> rpn.compile("5t")
Traceback (most recent call last):
  File "<interactive input>", line 1, in ?
  File "E:\dalke\rpn.py", line 122, in compile
    return translate(parse(tokenize(s)))
  File "E:\dalke\rpn.py", line 56, in parse
    tokens = list(tokens)  # this tree is very flat :)
  File "E:\dalke\rpn.py", line 46, in tokenize
    raise RPNError(
RPNError: Unknown token '5t' at line 0, character 1
>>>

I expect any similar Lisp implementation to track the
line numbers for error reporting.

                    Andrew
                    dalke at dalkescientific.com

import re
import sys
from compiler import ast, misc, pycodegen

class RPNError(Exception):
    pass

class Token:
    VAR = 1
    FLOAT = 2
    OPER = 3
    def __init__(self, type, val, lineno, charpos):
        self.type = type
        self.val = val
        self.lineno = lineno
        self.charpos = charpos

_symbol_re = re.compile(r"\S+")
_operators = "+ - * / **".split()
_variable_re = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$")
def tokenize(s):
    for lineno, line in enumerate(s.split("\n")):
        for match in _symbol_re.finditer(line):
            word = match.group(0)
            charpos = match.start(0) + 1

            try:
                yield Token(Token.FLOAT, float(word),
                            lineno, charpos)
                continue
            except ValueError:
                # okay, it isn't a float
                pass

            if word in _operators:
                yield Token(Token.OPER, word,
                            lineno, charpos)
                continue

            if _variable_re.match(word):
                yield Token(Token.VAR, word,
                            lineno, charpos)
                continue

            # Hmm, wonder what it is.
            raise RPNError(
                "Unknown token %r at line %d, character %d" %
                (word, lineno, charpos))

class ParseTree:
    def __init__(self, param_names, tokens):
        self.param_names = param_names
        self.tokens = tokens

def parse(tokens):
    tokens = list(tokens)  # this tree is very flat :)
    param_names = []
    for token in tokens:
        if token.type == Token.VAR:
            if token.val not in param_names:
                param_names.append(token.val)
    return ParseTree(param_names, tokens)

class Stack:
    def __init__(self):
        self.stack = []
    def add_term(self, term, token):
        term.lineno = token.lineno
        self.stack.append(term)
    def add_oper(self, klass, token):
        if len(self.stack) < 2:
            raise RPNError(
    "Binary operator at line %d char %d missing terms" %
    (token.lineno, token.charpos))
        term = klass(self.stack[-2:])
        term.lineno = token.lineno
        self.stack[-2:] = [term]

_id_gen = iter(xrange(sys.maxint))
_oper_table = {
    "+": ast.Add,
    "-": ast.Sub,
    "*": ast.Mul,
    "/": ast.Div,
    "**": ast.Power,
    }

def translate(parse_tree):
    stack = Stack()

    for token in parse_tree.tokens:
        if token.type == Token.FLOAT:
            stack.add_term(ast.Const(token.val), token)
        elif token.type == Token.VAR:
            stack.add_term(ast.Name(token.val),
                     token)
        elif token.type == Token.OPER:
            stack.add_oper(_oper_table[token.val], token)
        else:
            raise AssertionError(repr(token.type))

    stack = stack.stack
    if len(stack) != 1:
        raise RPNError("evaluation ends with stack size %d" %
                       len(stack))

    # go through an ugly bit of shenanigans
    # (I don't like the compiler API ):
    fctn_name = 'RPN' + str(_id_gen.next())
    fctn = ast.Function(fctn_name,
                        parse_tree.param_names, [],
                        0, None,
                        ast.Stmt([ast.Return(stack[0])]))
    mod = ast.Module(None, ast.Stmt([fctn]))
    misc.set_filename("<RPN string " + fctn_name + ">", mod)
    code = pycodegen.ModuleCodeGenerator(mod).getCode()
    d = {"__builtins__": {}}
    exec code in d, d
    return d[fctn_name]

def compile(s):
    return translate(parse(tokenize(s)))

def main():
    assert compile("a 2 3 + -")(a=6) == 1
    assert compile("a 2 3 + -")(7) == 2
    assert compile("a b *")(2, 3) == 6
    assert compile("a b -")(b=2, a=3) == 1
    assert compile("1 2 3 4 + + +")() == 10
    print "All tests passed"

if __name__ == "__main__":
    main()