[pypy-svn] r14116 - in pypy/dist/pypy/module/recparser: . data test

adim at codespeak.net adim at codespeak.net
Sun Jul 3 11:38:46 CEST 2005


Author: adim
Date: Sun Jul  3 11:38:44 2005
New Revision: 14116

Removed:
   pypy/dist/pypy/module/recparser/automata.py
   pypy/dist/pypy/module/recparser/data/
   pypy/dist/pypy/module/recparser/ebnflexer.py
   pypy/dist/pypy/module/recparser/ebnfparse.py
   pypy/dist/pypy/module/recparser/grammar.py
   pypy/dist/pypy/module/recparser/pythonlexer.py
   pypy/dist/pypy/module/recparser/pythonparse.py
   pypy/dist/pypy/module/recparser/pythonutil.py
   pypy/dist/pypy/module/recparser/pytokenize.py
   pypy/dist/pypy/module/recparser/syntaxtree.py
   pypy/dist/pypy/module/recparser/test/
   pypy/dist/pypy/module/recparser/test_lookahead.py
   pypy/dist/pypy/module/recparser/tuplebuilder.py
Modified:
   pypy/dist/pypy/module/recparser/compat.py
Log:
these files have been moved to interpreter/pyparser


Deleted: /pypy/dist/pypy/module/recparser/automata.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/automata.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,89 +0,0 @@
-#! /usr/bin/env python
-# ______________________________________________________________________
-"""Module automata
-
-THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
-TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
-instead of a unique nested one)
-
-$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
-"""
-# ______________________________________________________________________
-# Module level definitions
-
-# PYPY Modification: removed the EMPTY class as it's not needed here
-
-
-# PYPY Modification: we don't need a particuliar DEFAULT class here
-#                    a simple None works fine.
-#                    (Having a DefaultClass inheriting from str makes
-#                     the annotator crash)
-DEFAULT = None
-# PYPY Modification : removed all automata functions (any, maybe,
-#                     newArcPair, etc.)
-
-class DFA:
-    # ____________________________________________________________
-    def __init__(self, states, accepts, start = 0):
-        self.states = states
-        self.accepts = accepts
-        self.start = start
-
-    # ____________________________________________________________
-    def recognize (self, inVec, pos = 0, greedy = True):
-        crntState = self.start
-        i = pos
-        lastAccept = False
-        for item in inVec[pos:]:
-            # arcMap, accept = self.states[crntState]
-            arcMap = self.states[crntState]
-            accept = self.accepts[crntState]
-            if item in arcMap:
-                crntState = arcMap[item]
-            elif DEFAULT in arcMap:
-                crntState = arcMap[DEFAULT]
-            elif accept:
-                return i
-            elif lastAccept:
-                # This is now needed b/c of exception cases where there are
-                # transitions to dead states
-                return i - 1
-            else:
-                return -1
-            lastAccept = accept
-            i += 1
-        # if self.states[crntState][1]:
-        if self.accepts[crntState]:
-            return i
-        elif lastAccept:
-            return i - 1
-        else:
-            return -1
-
-# ______________________________________________________________________
-
-class NonGreedyDFA (DFA):
-    def recognize (self, inVec, pos = 0):
-        crntState = self.start
-        i = pos
-        for item in inVec[pos:]:
-            # arcMap, accept = self.states[crntState]
-            arcMap = self.states[crntState]
-            accept = self.accepts[crntState]
-            if accept:
-                return i
-            elif item in arcMap:
-                crntState = arcMap[item]
-            elif DEFAULT in arcMap:
-                crntState = arcMap[DEFAULT]
-            else:
-                return -1
-            i += 1
-        # if self.states[crntState][1]:
-        if self.accepts[crntState]:
-            return i
-        else:
-            return -1
-
-# ______________________________________________________________________
-# End of automata.py

Modified: pypy/dist/pypy/module/recparser/compat.py
==============================================================================
--- pypy/dist/pypy/module/recparser/compat.py	(original)
+++ pypy/dist/pypy/module/recparser/compat.py	Sun Jul  3 11:38:44 2005
@@ -3,14 +3,26 @@
 from pythonparse import parse_python_source
 from pythonutil import PYTHON_PARSER
 from compiler import transformer, compile as pycompile
- 
+
 def suite( source ):
-    builder = parse_python_source( source, PYTHON_PARSER, "file_input" )
-    return builder.stack[-1]
+    strings = [line+'\n' for line in source.split('\n')]
+    builder = parse_python_source( strings, PYTHON_PARSER, "file_input" )
+    nested_tuples = builder.stack[-1].as_tuple()
+    if builder.source_encoding is not None:
+        return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
+    else:
+        return (None, nested_tuples, None)
+    return nested_tuples
 
 def expr( source ):
-    builder = parse_python_source( source, PYTHON_PARSER, "eval_input" )
-    return builder.stack[-1]
+    strings = [line+'\n' for line in source.split('\n')]
+    builder = parse_python_source( strings, PYTHON_PARSER, "eval_input" )
+    nested_tuples = builder.stack[-1].as_tuple()
+    if builder.source_encoding is not None:
+        return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
+    else:
+        return (None, nested_tuples, None)
+    return nested_tuples
 
 def ast2tuple(node, line_info=False):
     """Quick dummy implementation of parser.ast2tuple(tree) function"""

Deleted: /pypy/dist/pypy/module/recparser/ebnflexer.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/ebnflexer.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,83 +0,0 @@
-"""This is a lexer for a Python recursive descent parser
-it obeys the TokenSource interface defined for the grammar
-analyser in grammar.py
-"""
-
-import re
-from grammar import TokenSource, Token
-
-DEBUG = False
-
-## Lexer for Python's grammar ########################################
-g_symdef = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*:",re.M)
-g_symbol = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*",re.M)
-g_string = re.compile(r"'[^']+'",re.M)
-g_tok = re.compile(r"\[|\]|\(|\)|\*|\+|\|",re.M)
-g_skip = re.compile(r"\s*(#.*$)?",re.M)
-
-class GrammarSource(TokenSource):
-    """The grammar tokenizer"""
-    def __init__(self, inpstring ):
-        TokenSource.__init__(self)
-        self.input = inpstring
-        self.pos = 0
-        self._peeked = None
-
-    def context(self):
-        return self.pos, self._peeked
-
-    def offset(self, ctx=None):
-        if ctx is None:
-            return self.pos
-        else:
-            assert type(ctx)==int
-            return ctx
-
-    def restore(self, ctx):
-        self.pos, self._peeked = ctx
-
-    def next(self):
-        if self._peeked is not None:
-            peeked = self._peeked
-            self._peeked = None
-            return peeked
-        
-        pos = self.pos
-        inp = self.input
-        m = g_skip.match(inp, pos)
-        while m and pos!=m.end():
-            pos = m.end()
-            if pos==len(inp):
-                self.pos = pos
-                return Token("EOF", None)
-            m = g_skip.match(inp, pos)
-        m = g_symdef.match(inp,pos)
-        if m:
-            tk = m.group(0)
-            self.pos = m.end()
-            return Token('SYMDEF',tk[:-1])
-        m = g_tok.match(inp,pos)
-        if m:
-            tk = m.group(0)
-            self.pos = m.end()
-            return Token(tk,tk)
-        m = g_string.match(inp,pos)
-        if m:
-            tk = m.group(0)
-            self.pos = m.end()
-            return Token('STRING',tk[1:-1])
-        m = g_symbol.match(inp,pos)
-        if m:
-            tk = m.group(0)
-            self.pos = m.end()
-            return Token('SYMBOL',tk)
-        raise ValueError("Unknown token at pos=%d context='%s'" % (pos,inp[pos:pos+20]) )
-
-    def peek(self):
-        if self._peeked is not None:
-            return self._peeked
-        self._peeked = self.next()
-        return self._peeked
-
-    def debug(self):
-        return self.input[self.pos:self.pos+20]

Deleted: /pypy/dist/pypy/module/recparser/ebnfparse.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/ebnfparse.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,277 +0,0 @@
-#!/usr/bin/env python
-from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
-     KleenStar, GrammarElement, build_first_sets, EmptyToken
-from ebnflexer import GrammarSource
-
-import re
-py_name = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*", re.M)
-
-punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
-       '//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
-       '\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^', '>>', '&', '\\+', '\\*', '-', '/', '\\.', '\\*\\*', '%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':', '@', '\\[', '\\]', '`', '\\{', '\\}']
-
-py_punct = re.compile(r"""
->=|<>|!=|<|>|<=|==|~|
-\*=|//=|%=|\^=|<<=|\*\*=|\|=|\+=|>>=|=|&=|/=|-=|
-,|\^|>>|&|\+|\*|-|/|\.|\*\*|%|<<|//|\||
-\)|\(|;|:|@|\[|\]|`|\{|\}
-""", re.M | re.X)
-
-
-TERMINALS = [
-    'NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
-    'INDENT', 'DEDENT' ]
-
-
-## Grammar Visitors ##################################################
-# FIXME: parsertools.py ? parser/__init__.py ?
-
-class NameToken(Token):
-    """A token that is not a keyword"""
-    def __init__(self, keywords=None ):
-        Token.__init__(self, "NAME")
-        self.keywords = keywords
-
-    def match(self, source, builder, level=0):
-        """Matches a token.
-        the default implementation is to match any token whose type
-        corresponds to the object's name. You can extend Token
-        to match anything returned from the lexer. for exemple
-        type, value = source.next()
-        if type=="integer" and int(value)>=0:
-            # found
-        else:
-            # error unknown or negative integer
-        """
-        ctx = source.context()
-        tk = source.next()
-        if tk.name==self.name:
-            if tk.value not in self.keywords:
-                ret = builder.token( tk.name, tk.value, source )
-                return self.debug_return( ret, tk.name, tk.value )
-        source.restore( ctx )
-        return 0
-        
-    def match_token(self, other):
-        """convenience '==' implementation, this is *not* a *real* equality test
-        a Token instance can be compared to:
-         - another Token instance in which case all fields (name and value)
-           must be equal
-         - a tuple, such as those yielded by the Python lexer, in which case
-           the comparison algorithm is similar to the one in match()
-           XXX:
-             1/ refactor match and __eq__ ?
-             2/ make source.next and source.peek return a Token() instance
-        """
-        if not isinstance(other, Token):
-            raise RuntimeError("Unexpected token type %r" % other)
-        if other is EmptyToken:
-            return False
-        if other.name != self.name:
-            return False
-        if other.value in self.keywords:
-            return False
-        return True
-
-
-class EBNFVisitor(object):
-    
-    def __init__(self):
-        self.rules = {}
-        self.terminals = {}
-        self.current_rule = None
-        self.current_subrule = 0
-        self.tokens = {}
-        self.items = []
-        self.terminals['NAME'] = NameToken()
-
-    def new_name( self ):
-        rule_name = ":%s_%s" % (self.current_rule, self.current_subrule)
-        self.current_subrule += 1
-        return rule_name
-
-    def new_item( self, itm ):
-        self.items.append( itm )
-        return itm
-    
-    def visit_grammar( self, node ):
-        # print "Grammar:"
-        for rule in node.nodes:
-            rule.visit(self)
-        # the rules are registered already
-        # we do a pass through the variables to detect
-        # terminal symbols from non terminals
-        for r in self.items:
-            for i,a in enumerate(r.args):
-                if a.name in self.rules:
-                    assert isinstance(a,Token)
-                    r.args[i] = self.rules[a.name]
-                    if a.name in self.terminals:
-                        del self.terminals[a.name]
-        # XXX .keywords also contains punctuations
-        self.terminals['NAME'].keywords = self.tokens.keys()
-
-    def visit_rule( self, node ):
-        symdef = node.nodes[0].value
-        self.current_rule = symdef
-        self.current_subrule = 0
-        alt = node.nodes[1]
-        rule = alt.visit(self)
-        if not isinstance( rule, Token ):
-            rule.name = symdef
-        self.rules[symdef] = rule
-        
-    def visit_alternative( self, node ):
-        items = [ node.nodes[0].visit(self) ]
-        items += node.nodes[1].visit(self)        
-        if len(items) == 1 and items[0].name.startswith(':'):
-            return items[0]
-        alt = Alternative( self.new_name(), items )
-        return self.new_item( alt )
-
-    def visit_sequence( self, node ):
-        """ """
-        items = []
-        for n in node.nodes:
-            items.append( n.visit(self) )
-        if len(items)==1:
-            return items[0]
-        elif len(items)>1:
-            return self.new_item( Sequence( self.new_name(), items) )
-        raise SyntaxError("Found empty sequence")
-
-    def visit_sequence_cont( self, node ):
-        """Returns a list of sequences (possibly empty)"""
-        return [n.visit(self) for n in node.nodes]
-
-    def visit_seq_cont_list(self, node):
-        return node.nodes[1].visit(self)
-    
-
-    def visit_symbol(self, node):
-        star_opt = node.nodes[1]
-        sym = node.nodes[0].value
-        terminal = self.terminals.get( sym )
-        if not terminal:
-            terminal = Token( sym )
-            self.terminals[sym] = terminal
-
-        return self.repeat( star_opt, terminal )
-
-    def visit_option( self, node ):
-        rule = node.nodes[1].visit(self)
-        return self.new_item( KleenStar( self.new_name(), 0, 1, rule ) )
-
-    def visit_group( self, node ):
-        rule = node.nodes[1].visit(self)
-        return self.repeat( node.nodes[3], rule )
-
-    def visit_STRING( self, node ):
-        value = node.value
-        tok = self.tokens.get(value)
-        if not tok:
-            if py_punct.match( value ):
-                tok = Token( value )
-            elif py_name.match( value ):
-                tok = Token('NAME', value)
-            else:
-                raise SyntaxError("Unknown STRING value ('%s')" % value )
-            self.tokens[value] = tok
-        return tok
-
-    def visit_sequence_alt( self, node ):
-        res = node.nodes[0].visit(self)
-        assert isinstance( res, GrammarElement )
-        return res
-
-    def repeat( self, star_opt, myrule ):
-        if star_opt.nodes:
-            rule_name = self.new_name()
-            tok = star_opt.nodes[0].nodes[0]
-            if tok.value == '+':
-                return self.new_item( KleenStar( rule_name, _min=1, rule = myrule ) )
-            elif tok.value == '*':
-                return self.new_item( KleenStar( rule_name, _min=0, rule = myrule ) )
-            else:
-                raise SyntaxError("Got symbol star_opt with value='%s'" % tok.value )
-        return myrule
-
-rules = None
-
-def grammar_grammar():
-    """Builds the grammar for the grammar file
-
-    Here's the description of the grammar's grammar ::
-
-      grammar: rule+
-      rule: SYMDEF alternative
-      
-      alternative: sequence ( '|' sequence )+
-      star: '*' | '+'
-      sequence: (SYMBOL star? | STRING | option | group star? )+
-      option: '[' alternative ']'
-      group: '(' alternative ')' star?    
-    """
-    global rules
-    # star: '*' | '+'
-    star          = Alternative( "star", [Token('*'), Token('+')] )
-    star_opt      = KleenStar  ( "star_opt", 0, 1, rule=star )
-
-    # rule: SYMBOL ':' alternative
-    symbol        = Sequence(    "symbol", [Token('SYMBOL'), star_opt] )
-    symboldef     = Token(       "SYMDEF" )
-    alternative   = Sequence(    "alternative", [])
-    rule          = Sequence(    "rule", [symboldef, alternative] )
-
-    # grammar: rule+
-    grammar       = KleenStar(   "grammar", _min=1, rule=rule )
-
-    # alternative: sequence ( '|' sequence )*
-    sequence      = KleenStar(   "sequence", 1 )
-    seq_cont_list = Sequence(    "seq_cont_list", [Token('|'), sequence] )
-    sequence_cont = KleenStar(   "sequence_cont",0, rule=seq_cont_list )
-    
-    alternative.args = [ sequence, sequence_cont ]
-
-    # option: '[' alternative ']'
-    option        = Sequence(    "option", [Token('['), alternative, Token(']')] )
-
-    # group: '(' alternative ')'
-    group         = Sequence(    "group",  [Token('('), alternative, Token(')'), star_opt] )
-
-    # sequence: (SYMBOL | STRING | option | group )+
-    string = Token('STRING')
-    alt           = Alternative( "sequence_alt", [symbol, string, option, group] ) 
-    sequence.args = [ alt ]
-
-
-    rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
-              seq_cont_list, sequence_cont, option, group, alt ]
-    build_first_sets( rules )
-    return grammar
-
-
-def parse_grammar(stream):
-    """parses the grammar file
-
-    stream : file-like object representing the grammar to parse
-    """
-    source = GrammarSource(stream.read())
-    rule = grammar_grammar()
-    builder = BaseGrammarBuilder()
-    result = rule.match(source, builder)
-    node = builder.stack[-1]
-    vis = EBNFVisitor()
-    node.visit(vis)
-    return vis
-
-
-from pprint import pprint
-if __name__ == "__main__":
-    grambuild = parse_grammar(file('data/Grammar2.3'))
-    for i,r in enumerate(grambuild.items):
-        print "%  3d : %s" % (i, r)
-    pprint(grambuild.terminals.keys())
-    pprint(grambuild.tokens)
-    print "|".join(grambuild.tokens.keys() )
-

Deleted: /pypy/dist/pypy/module/recparser/grammar.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/grammar.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,559 +0,0 @@
-"""
-a generic recursive descent parser
-the grammar is defined as a composition of objects
-the objects of the grammar are :
-Alternative : as in S -> A | B | C
-Sequence    : as in S -> A B C
-KleenStar   : as in S -> A* or S -> A+
-Token       : a lexer token
-"""
-
-DEBUG = 0
-USE_LOOKAHEAD = True
-
-#### Abstract interface for a lexer/tokenizer
-class TokenSource(object):
-    """Abstract base class for a source tokenizer"""
-    def context(self):
-        """Returns a context to restore the state of the object later"""
-
-    def restore(self, ctx):
-        """Restore the context"""
-
-    def next(self):
-        """Returns the next token from the source
-        a token is a tuple : (type,value) or (None,None) if the end of the
-        source has been found
-        """
-
-    def offset(self, ctx=None):
-        """Returns the position we're at so far in the source
-        optionnally provide a context and you'll get the offset
-        of the context"""
-        return -1
-
-    def current_line(self):
-        """Returns the current line number"""
-        return 0
-
-    def get_pos(self):
-        """Returns the current source position of the scanner"""
-        return 0
-
-    def get_source_text(self, pos1, pos2 ):
-        """Returns the source text between two scanner positions"""
-        return ""
-
-
-######################################################################
-
-
-def build_first_sets(rules):
-    """builds the real first tokens set for each rule in <rules>
-
-    Because a rule can be recursive (directly or indirectly), the
-    *simplest* algorithm to build each first set is to recompute them
-    until Computation(N) = Computation(N-1), N being the number of rounds.
-    As an example, on Python2.3's grammar, we need 19 cycles to compute
-    full first sets.
-    """
-    changed = True
-    while changed:
-        # loop while one first set is changed
-        changed = False
-        for rule in rules:
-            # For each rule, recompute first set
-            size = len(rule.first_set)
-            rule.calc_first_set()
-            new_size = len(rule.first_set)
-            if new_size != size:
-                changed = True
-    for r in rules:
-        assert len(r.first_set) > 0, "Error: ot Empty firstset for %s" % r
-        r.reorder_rule()
-
-
-from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
-
-class BaseGrammarBuilder(object):
-    """Base/default class for a builder"""
-    def __init__(self, rules=None, debug=0):
-        self.rules = rules or {} # a dictionary of grammar rules for debug/reference
-        # XXX This attribute is here for convenience
-        self.source_encoding = None
-        self.debug = debug
-        self.stack = []
-
-    def context(self):
-        """Returns the state of the builder to be restored later"""
-        #print "Save Stack:", self.stack
-        return len(self.stack)
-
-    def restore(self, ctx):
-        del self.stack[ctx:]
-        #print "Restore Stack:", self.stack
-        
-    def alternative(self, rule, source):
-        # Do nothing, keep rule on top of the stack
-        if rule.is_root():
-            elems = self.stack[-1].expand()
-            self.stack[-1] = SyntaxNode(rule.name, source, elems)
-            if self.debug:
-                self.stack[-1].dumpstr()
-        return True
-
-    def sequence(self, rule, source, elts_number):
-        """ """
-        items = []
-        for node in self.stack[-elts_number:]:
-            items += node.expand()
-        if rule.is_root():
-            node_type = SyntaxNode
-        else:
-            node_type = TempSyntaxNode
-        # replace N elements with 1 element regrouping them
-        if elts_number >= 1:
-            elem = node_type(rule.name, source, items)
-            del self.stack[-elts_number:]
-            self.stack.append(elem)
-        elif elts_number == 0:
-            self.stack.append(node_type(rule.name, source, []))
-        if self.debug:
-            self.stack[-1].dumpstr()
-        return True
-
-    def token(self, name, value, source):
-        self.stack.append(TokenNode(name, source, value))
-        if self.debug:
-            self.stack[-1].dumpstr()
-        return True
-
-
-######################################################################
-# Grammar Elements Classes (Alternative, Sequence, KleenStar, Token) #
-######################################################################
-class GrammarElement(object):
-    """Base parser class"""
-    def __init__(self, name):
-        # the rule name
-        self.name = name
-        self.args = []
-        self._is_root = False
-        self.first_set = []
-        self.first_set_complete = False
-        # self._processing = False
-        self._trace = False
-
-    def is_root(self):
-        """This is a root node of the grammar, that is one that will
-        be included in the syntax tree"""
-        if self.name != ":" and self.name.startswith(":"):
-            return False
-        return True
-    
-
-    def match(self, source, builder, level=0):
-        """Try to match a grammar rule
-
-        If next set of tokens matches this grammar element, use <builder>
-        to build an appropriate object, otherwise returns None.
-
-        /!\ If the sets of element didn't match the current grammar
-        element, then the <source> is restored as it was before the
-        call to the match() method
-
-        returns None if no match or an object build by builder
-        """
-        if not USE_LOOKAHEAD:
-            return self._match(source, builder, level)
-        pos1 = -1 # XXX make the annotator happy
-        pos2 = -1 # XXX make the annotator happy
-        token = source.peek()
-        if self._trace:
-            pos1 = source.get_pos()
-        in_first_set = self.match_first_set(token)
-        if not in_first_set: # and not EmptyToken in self.first_set:
-            if EmptyToken in self.first_set:
-                ret = builder.sequence(self, source, 0 )
-                if self._trace:
-                    self._debug_display(token, level, 'eee')
-                return self.debug_return( ret, 0 )
-            if self._trace:
-                self._debug_display(token, level, 'rrr')
-            return 0
-        elif self._trace:
-            self._debug_display(token, level, '>>>')
-        
-        res = self._match(source, builder, level)
-        if self._trace:
-            pos2 = source.get_pos()
-            if res:
-                prefix = '+++'
-            else:
-                prefix = '---'
-            self._debug_display(token, level, prefix)
-            print ' '*level, prefix, " TEXT ='%s'" % (
-                source.get_source_text(pos1,pos2))
-            if res:
-                print "*" * 50
-        return res
-
-    def _debug_display(self, token, level, prefix):
-        """prints context debug informations"""
-        prefix = '%s%s' % (' ' * level, prefix)
-        print prefix, " RULE =", self
-        print prefix, " TOKEN =", token
-        print prefix, " FIRST SET =", self.first_set
-        
-        
-    def _match(self, source, builder, level=0):
-        """Try to match a grammar rule
-
-        If next set of tokens matches this grammar element, use <builder>
-        to build an appropriate object, otherwise returns 0.
-
-        /!\ If the sets of element didn't match the current grammar
-        element, then the <source> is restored as it was before the
-        call to the match() method
-
-        returns None if no match or an object build by builder
-        """
-        return 0
-    
-    def parse(self, source):
-        """Returns a simplified grammar if the rule matched at the source
-        current context or None"""
-        # **NOT USED** **NOT IMPLEMENTED**
-        # To consider if we need to improve speed in parsing
-        pass
-
-    def __str__(self):
-        return self.display(0)
-
-    def __repr__(self):
-        return self.display(0)
-
-    def display(self, level):
-        """Helper function used to represent the grammar.
-        mostly used for debugging the grammar itself"""
-        return "GrammarElement"
-
-
-    def debug_return(self, ret, *args ):
-        # FIXME: use a wrapper of match() methods instead of debug_return()
-        #        to prevent additional indirection
-        if ret and DEBUG>0:
-            sargs = ",".join( [ str(i) for i in args ] )
-            print "matched %s (%s): %s" % (self.__class__.__name__, sargs, self.display() )
-        return ret
-
-    
-    def calc_first_set(self):
-        """returns the list of possible next tokens
-        *must* be implemented in subclasses
-        """
-        # XXX: first_set could probably be implemented with sets
-        return []
-
-    def match_first_set(self, other):
-        """matching is not equality:
-        token('NAME','x') matches token('NAME',None)
-        """
-        for tk in self.first_set:
-            if tk.match_token( other ):
-                return True
-        return False
-
-    def in_first_set(self, other):
-        return other in self.first_set
-
-    def reorder_rule(self):
-        """Called after the computation of first set to allow rules to be reordered
-        to avoid ambiguities"""
-        pass
-
-class Alternative(GrammarElement):
-    """Represents an alternative in a grammar rule (as in S -> A | B | C)"""
-    def __init__(self, name, args):
-        GrammarElement.__init__(self, name )
-        self.args = args
-        self._reordered = False
-        for i in self.args:
-            assert isinstance( i, GrammarElement )
-
-    def _match(self, source, builder, level=0):
-        """If any of the rules in self.args matches
-        returns the object built from the first rules that matches
-        """
-        if DEBUG>1:
-            print "try alt:", self.display()
-        tok = source.peek()
-        # Here we stop at the first match we should
-        # try instead to get the longest alternative
-        # to see if this solve our problems with infinite recursion
-        for rule in self.args:
-            if USE_LOOKAHEAD:
-                if not rule.match_first_set(tok) and EmptyToken not in rule.first_set:
-                    if self._trace:
-                        print "Skipping impossible rule: %s" % (rule,)
-                    continue
-            m = rule.match(source, builder, level+1)
-            if m:
-                ret = builder.alternative( self, source )
-                return self.debug_return( ret )
-        return 0
-
-    def display(self, level=0):
-        if level==0:
-            name =  self.name + " -> "
-        elif not self.name.startswith(":"):
-            return self.name
-        else:
-            name = ""
-        items = [ a.display(1) for a in self.args ]
-        return name+"(" + "|".join( items ) + ")"
-
-    def calc_first_set(self):
-        """returns the list of possible next tokens
-        if S -> (A | B | C):
-            LAH(S) = Union( LAH(A), LAH(B), LAH(C) )
-        """
-        # do this to avoid problems on indirect recursive rules
-        for rule in self.args:
-            for t in rule.first_set:
-                if t not in self.first_set:
-                    self.first_set.append(t)
-                # self.first_set[t] = 1
-
-    def reorder_rule(self):
-        # take the opportunity to reorder rules in alternatives
-        # so that rules with Empty in their first set come last
-        # warn if two rules have empty in their first set
-        empty_set = []
-        not_empty_set = []
-        # <tokens> is only needed for warning / debugging purposes
-        tokens_set = []
-        for rule in self.args:
-            if EmptyToken in rule.first_set:
-                empty_set.append(rule)
-            else:
-                not_empty_set.append(rule)
-            if DEBUG:
-                # This loop is only neede dfor warning / debugging purposes
-                # It will check if a token is part of several first sets of
-                # a same alternative
-                for token in rule.first_set:
-                    if token is not EmptyToken and token in tokens_set:
-                        print "Warning, token %s in\n\t%s's first set is part " \
-                              "of a previous rule's first set in alternative\n\t" \
-                              "%s" % (token, rule, self)
-                    tokens_set.append(token)
-        if len(empty_set) > 1 and not self._reordered:
-            print "Warning: alternative %s has more than one rule matching Empty" % self
-            self._reordered = True
-        self.args[:] = not_empty_set
-        self.args.extend( empty_set )
-
-    
-class Sequence(GrammarElement):
-    """Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
-    def __init__(self, name, args):
-        GrammarElement.__init__(self, name )
-        self.args = args
-        for i in self.args:
-            assert isinstance( i, GrammarElement )
-
-    def _match(self, source, builder, level=0):
-        """matches all of the symbols in order"""
-        if DEBUG>1:
-            print "try seq:", self.display()
-        ctx = source.context()
-        bctx = builder.context()
-        for rule in self.args:
-            m = rule.match(source, builder, level+1)
-            if not m:
-                # Restore needed because some rules may have been matched
-                # before the one that failed
-                source.restore(ctx)
-                builder.restore(bctx)
-                return 0
-        ret = builder.sequence(self, source, len(self.args))
-        return self.debug_return( ret )
-
-    def display(self, level=0):
-        if level == 0:
-            name = self.name + " -> "
-        elif not self.name.startswith(":"):
-            return self.name
-        else:
-            name = ""
-        items = [a.display(1) for a in self.args]
-        return name + "(" + " ".join( items ) + ")"
-
-    def calc_first_set(self):
-        """returns the list of possible next tokens
-        if S -> A* B C:
-            LAH(S) = Union( LAH(A), LAH(B) )
-        if S -> A+ B C:
-            LAH(S) = LAH(A)
-        if S -> A B C:
-            LAH(S) = LAH(A)
-        """
-        for rule in self.args:
-            if not rule.first_set:
-                break
-            if EmptyToken in self.first_set:
-                self.first_set.remove( EmptyToken )
-
-                # del self.first_set[EmptyToken]
-            # while we're in this loop, keep agregating possible tokens
-            for t in rule.first_set:
-                if t not in self.first_set:
-                    self.first_set.append(t)
-                # self.first_set[t] = 1
-            if EmptyToken not in rule.first_set:
-                break
-                
-
-
-class KleenStar(GrammarElement):
-    """Represents a KleenStar in a grammar rule as in (S -> A+) or (S -> A*)"""
-    def __init__(self, name, _min = 0, _max = -1, rule=None):
-        GrammarElement.__init__( self, name )
-        self.args = [rule]
-        self.min = _min
-        if _max == 0:
-            raise ValueError("KleenStar needs max==-1 or max>1")
-        self.max = _max
-        self.star = "x"
-        if self.min == 0:
-            self.first_set.append( EmptyToken )
-            # self.first_set[EmptyToken] = 1
-
-    def _match(self, source, builder, level=0):
-        """matches a number of times self.args[0]. the number must be comprised
-        between self._min and self._max inclusive. -1 is used to represent infinity"""
-        if DEBUG>1:
-            print "try kle:", self.display()
-        ctx = source.context()
-        bctx = builder.context()
-        rules = 0
-        rule = self.args[0]
-        while True:
-            m = rule.match(source, builder, level+1)
-            if not m:
-                # Rule should be matched at least 'min' times
-                if rules<self.min:
-                    source.restore(ctx)
-                    builder.restore(bctx)
-                    return 0
-                ret = builder.sequence(self, source, rules)
-                return self.debug_return( ret, rules )
-            rules += 1
-            if self.max>0 and rules == self.max:
-                ret = builder.sequence(self, source, rules)
-                return self.debug_return( ret, rules )
-
-    def display(self, level=0):
-        if level==0:
-            name =  self.name + " -> "
-        elif not self.name.startswith(":"):
-            return self.name
-        else:
-            name = ""
-        star = "{%d,%d}" % (self.min,self.max)
-        if self.min==0 and self.max==1:
-            star = "?"
-        elif self.min==0 and self.max==-1:
-            star = "*"
-        elif self.min==1 and self.max==-1:
-            star = "+"
-        s = self.args[0].display(1)
-        return name + "%s%s" % (s, star)
-
-
-    def calc_first_set(self):
-        """returns the list of possible next tokens
-        if S -> A*:
-            LAH(S) = Union( LAH(A), EmptyToken )
-        if S -> A+:
-            LAH(S) = LAH(A)
-        """
-        rule = self.args[0]
-        self.first_set = rule.first_set[:]
-        # self.first_set = dict(rule.first_set)
-        if self.min == 0 and EmptyToken not in self.first_set:
-            self.first_set.append(EmptyToken)
-            # self.first_set[EmptyToken] = 1
-
-class Token(GrammarElement):
-    """Represents a Token in a grammar rule (a lexer token)"""
-    def __init__( self, name, value = None):
-        GrammarElement.__init__( self, name )
-        self.value = value
-        self.first_set = [self]
-        # self.first_set = {self: 1}
-
-    def match(self, source, builder, level=0):
-        """Matches a token.
-        the default implementation is to match any token whose type
-        corresponds to the object's name. You can extend Token
-        to match anything returned from the lexer. for exemple
-        type, value = source.next()
-        if type=="integer" and int(value)>=0:
-            # found
-        else:
-            # error unknown or negative integer
-        """
-        ctx = source.context()
-        tk = source.next()
-        # XXX: match_token
-        if tk.name == self.name:
-            if self.value is None:
-                ret = builder.token( tk.name, tk.value, source )
-                return self.debug_return( ret, tk.name )
-            elif self.value == tk.value:
-                ret = builder.token( tk.name, tk.value, source )
-                return self.debug_return( ret, tk.name, tk.value )
-        if DEBUG>1:
-            print "tried tok:", self.display()
-        source.restore( ctx )
-        return 0
-
-    def display(self, level=0):
-        if self.value is None:
-            return "<%s>" % self.name
-        else:
-            return "<%s>=='%s'" % (self.name, self.value)
-    
-
-    def match_token(self, other):
-        """convenience '==' implementation, this is *not* a *real* equality test
-        a Token instance can be compared to:
-         - another Token instance in which case all fields (name and value)
-           must be equal
-         - a tuple, such as those yielded by the Python lexer, in which case
-           the comparison algorithm is similar to the one in match()
-           XXX:
-             1/ refactor match and __eq__ ?
-             2/ make source.next and source.peek return a Token() instance
-        """
-        if not isinstance(other, Token):
-            raise RuntimeError("Unexpected token type %r" % other)
-        if other is EmptyToken:
-            return False
-        res = other.name == self.name and self.value in (None, other.value)
-        #print "matching", self, other, res
-        return res
-    
-    def __eq__(self, other):
-        return self.name == other.name and self.value == other.value
-        
-
-    
-    def calc_first_set(self):
-        """computes the list of possible next tokens
-        """
-        pass
-
-EmptyToken = Token(None)

Deleted: /pypy/dist/pypy/module/recparser/pythonlexer.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pythonlexer.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,378 +0,0 @@
-"""This is a lexer for a Python recursive descent parser
-it obeys the TokenSource interface defined for the grammar
-analyser in grammar.py
-"""
-
-from grammar import TokenSource, Token
-# Don't import string for that ...
-NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
-NUMCHARS = '0123456789'
-ALNUMCHARS = NAMECHARS + NUMCHARS
-EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.'
-WHITESPACES = ' \t\n\r\v\f'
-
-def match_encoding_declaration(comment):
-    """returns the declared encoding or None
-
-    This function is a replacement for :
-    >>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)")
-    >>> py_encoding.search(comment)
-    """
-    index = comment.find('coding')
-    if index == -1:
-        return None
-    next_char = comment[index + 6]
-    if next_char not in ':=':
-        return None
-    end_of_decl = comment[index + 7:]
-    index = 0
-    for char in end_of_decl:
-        if char not in WHITESPACES:
-            break
-        index += 1
-    else:
-        return None
-    encoding = ''
-    for char in end_of_decl[index:]:
-        if char in EXTENDED_ALNUMCHARS:
-            encoding += char
-        else:
-            break
-    if encoding != '':
-        return encoding
-    return None
-    
-def _normalize_encoding(encoding):
-    """returns normalized name for <encoding>
-
-    see dist/src/Parser/tokenizer.c 'get_normal_name()'
-    for implementation details / reference
-
-    NOTE: for now, parser.suite() raises a MemoryError when
-          a bad encoding is used. (SF bug #979739)
-    """
-    # lower() + '_' / '-' conversion
-    encoding = encoding.replace('_', '-').lower()
-    if encoding.startswith('utf-8'):
-        return 'utf-8'
-    for variant in ('latin-1', 'iso-latin-1', 'iso-8859-1'):
-        if encoding.startswith(variant):
-            return 'iso-8859-1'
-    return encoding
-
-################################################################################
-import token as tokenmod
-from pytokenize import tabsize, whiteSpaceDFA, triple_quoted, endDFAs, \
-     single_quoted, pseudoDFA 
-import automata
-
-# adopt pytokenize notations / values
-tokenmod.COMMENT = tokenmod.N_TOKENS 
-tokenmod.NL = tokenmod.N_TOKENS + 1
-
-class TokenError(Exception):
-    """Raised when EOF is found prematuerly"""
-    def __init__(self, msg, strstart, token_stack):
-        # Exception.__init__(self, msg)
-        self.strstart = strstart
-        self.token_stack = token_stack
-    
-
-def generate_tokens(lines):
-    """
-    This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since
-    the original function is not RPYTHON (uses yield)
-    It was also slightly modified to generate Token instances instead
-    of the original 5-tuples
-
-    Original docstring ::
-    
-        The generate_tokens() generator requires one argment, readline, which
-        must be a callable object which provides the same interface as the
-        readline() method of built-in file objects. Each call to the function
-        should return one line of input as a string.
-
-        The generator produces 5-tuples with these members: the token type; the
-        token string; a 2-tuple (srow, scol) of ints specifying the row and
-        column where the token begins in the source; a 2-tuple (erow, ecol) of
-        ints specifying the row and column where the token ends in the source;
-        and the line on which the token was found. The line passed is the
-        logical line; continuation lines are included.
-    """
-    token_list = []
-    lnum = parenlev = continued = 0
-    namechars = NAMECHARS
-    numchars = NUMCHARS
-    contstr, needcont = '', 0
-    contline = None
-    indents = [0]
-    last_comment = ''
-    encoding = None
-    strstart = (0, 0)
-
-    lines.append('') # XXX HACK probably not needed
-    endDFA = automata.DFA([], []) # XXX Make the translator happy
-    line = ''                 # XXX Make the translator happy
-    for line in lines:
-        lnum = lnum + 1
-        pos, max = 0, len(line)
-
-        if contstr:                            # continued string
-            if not line:
-                raise TokenError("EOF in multi-line string", strstart, token_list)
-            endmatch = endDFA.recognize(line)
-            if -1 != endmatch:
-                pos = end = endmatch
-                tok = token_from_values(tokenmod.STRING, contstr + line[:end])
-                token_list.append((tok, line))
-                last_comment = ''
-                # token_list.append((STRING, contstr + line[:end],
-                #                    strstart, (lnum, end), contline + line))
-                contstr, needcont = '', 0
-                contline = None
-            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
-                tok = token_from_values(tokenmod.ERRORTOKEN, contstr + line)
-                token_list.append((tok, line))
-                last_comment = ''
-                # token_list.append((ERRORTOKEN, contstr + line,
-                #                    strstart, (lnum, len(line)), contline))
-                contstr = ''
-                contline = None
-                continue
-            else:
-                contstr = contstr + line
-                contline = contline + line
-                continue
-
-        elif parenlev == 0 and not continued:  # new statement
-            if not line: break
-            column = 0
-            while pos < max:                   # measure leading whitespace
-                if line[pos] == ' ': column = column + 1
-                elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
-                elif line[pos] == '\f': column = 0
-                else: break
-                pos = pos + 1
-            if pos == max: break
-
-            if line[pos] in '#\r\n':           # skip comments or blank lines
-                if line[pos] == '#':
-                    tok = token_from_values(tokenmod.COMMENT, line[pos:])
-                    last_comment = line[pos:]
-                    if lnum <= 2 and encoding is None:
-                        encoding = match_encoding_declaration(last_comment)
-                        if encoding is not None:
-                            encoding = _normalize_encoding(encoding)
-                else:
-                    tok = token_from_values(tokenmod.NL, line[pos:])
-                    last_comment = ''
-                # XXX Skip NL and COMMENT Tokens   # token_list.append((tok, line))
-                # token_list.append(((NL, COMMENT)[line[pos] == '#'], line[pos:],
-                #                    (lnum, pos), (lnum, len(line)), line))
-                continue
-
-            if column > indents[-1]:           # count indents or dedents
-                indents.append(column)
-                tok = token_from_values(tokenmod.INDENT, line[:pos])
-                token_list.append((tok, line))
-                last_comment = ''
-                # token_list.append((INDENT, line[:pos], (lnum, 0), (lnum, pos), line))
-            while column < indents[-1]:
-                indents = indents[:-1]
-                tok = token_from_values(tokenmod.DEDENT, '')
-                token_list.append((tok, line))
-                last_comment = ''
-                # token_list.append((DEDENT, '', (lnum, pos), (lnum, pos), line))
-
-        else:                                  # continued statement
-            if not line:
-                raise TokenError("EOF in multi-line statement", (lnum, 0), token_list)
-            continued = 0
-
-        while pos < max:
-            pseudomatch = pseudoDFA.recognize(line, pos)
-            if -1 != pseudomatch:                            # scan for tokens
-                # JDR: Modified
-                start = whiteSpaceDFA.recognize(line, pos)
-                if -1 == start:
-                    start = pos
-                end = pseudomatch
-
-                spos, epos, pos = (lnum, start), (lnum, end), end
-                token, initial = line[start:end], line[start]
-
-                if initial in numchars or \
-                   (initial == '.' and token != '.'):      # ordinary number
-                    tok = token_from_values(tokenmod.NUMBER, token)
-                    token_list.append((tok, line))
-                    last_comment = ''
-                    # token_list.append((NUMBER, token, spos, epos, line))
-                elif initial in '\r\n':
-                    if parenlev > 0:
-                        tok = token_from_values(tokenmod.NL, token)
-                        last_comment = ''
-                        # XXX Skip NL
-                    else:
-                        tok = token_from_values(tokenmod.NEWLINE, token)
-                        # XXX YUCK !
-                        tok.value = last_comment
-                        token_list.append((tok, line))
-                        last_comment = ''
-                    # token_list.append((parenlev > 0 and NL or NEWLINE, token, spos, epos, line))
-                elif initial == '#':
-                    tok = token_from_values(tokenmod.COMMENT, token)
-                    last_comment = token
-                    if lnum <= 2 and encoding is None:
-                        encoding = match_encoding_declaration(last_comment)
-                        if encoding is not None:
-                            encoding = _normalize_encoding(encoding)
-                    # XXX Skip # token_list.append((tok, line))
-                    # token_list.append((COMMENT, token, spos, epos, line))
-                elif token in triple_quoted:
-                    endDFA = endDFAs[token]
-                    endmatch = endDFA.recognize(line, pos)
-                    if -1 != endmatch:                     # all on one line
-                        pos = endmatch
-                        token = line[start:pos]
-                        tok = token_from_values(tokenmod.STRING, token)
-                        token_list.append((tok, line))
-                        last_comment = ''
-                        # token_list.append((STRING, token, spos, (lnum, pos), line))
-                    else:
-                        strstart = (lnum, start)           # multiple lines
-                        contstr = line[start:]
-                        contline = line
-                        break
-                elif initial in single_quoted or \
-                    token[:2] in single_quoted or \
-                    token[:3] in single_quoted:
-                    if token[-1] == '\n':                  # continued string
-                        strstart = (lnum, start)
-                        endDFA = (endDFAs[initial] or endDFAs[token[1]] or
-                                   endDFAs[token[2]])
-                        contstr, needcont = line[start:], 1
-                        contline = line
-                        break
-                    else:                                  # ordinary string
-                        tok = token_from_values(tokenmod.STRING, token)
-                        token_list.append((tok, line))
-                        last_comment = ''
-                        # token_list.append((STRING, token, spos, epos, line))
-                elif initial in namechars:                 # ordinary name
-                    tok = token_from_values(tokenmod.NAME, token)
-                    token_list.append((tok, line))
-                    last_comment = ''
-                    # token_list.append((NAME, token, spos, epos, line))
-                elif initial == '\\':                      # continued stmt
-                    continued = 1
-                else:
-                    if initial in '([{': parenlev = parenlev + 1
-                    elif initial in ')]}': parenlev = parenlev - 1
-                    tok = token_from_values(tokenmod.OP, token)
-                    token_list.append((tok, line)) 
-                    last_comment = ''
-                    # token_list.append((OP, token, spos, epos, line))
-            else:
-                tok = token_from_values(tokenmod.ERRORTOKEN, line[pos])
-                token_list.append((tok, line))
-                last_comment = ''
-                # token_list.append((ERRORTOKEN, line[pos],
-                #                    (lnum, pos), (lnum, pos+1), line))
-                pos = pos + 1
-
-    last_comment = ''
-    for indent in indents[1:]:                 # pop remaining indent levels
-        tok = token_from_values(tokenmod.DEDENT, '')
-        token_list.append((tok, line))
-        # token_list.append((DEDENT, '', (lnum, 0), (lnum, 0), ''))
-
-    ## <XXX> adim
-    token_list.append((Token('NEWLINE', ''), line))
-    ## </XXX>
-    tok = token_from_values(tokenmod.ENDMARKER, '',)
-    token_list.append((tok, line))
-    # token_list.append((ENDMARKER, '', (lnum, 0), (lnum, 0), ''))
-    return token_list, encoding
-
-class PythonSource(TokenSource):
-    """This source uses Jonathan's tokenizer"""
-    def __init__(self, strings):
-        # TokenSource.__init__(self)
-        tokens, encoding = generate_tokens(strings)
-        self.token_stack = tokens
-        self.encoding = encoding
-        self._current_line = '' # the current line (as a string)
-        self.stack_pos = 0
-
-    def next(self):
-        if self.stack_pos >= len(self.token_stack):
-            raise StopIteration("Remove me")
-        tok, line = self.token_stack[self.stack_pos]
-        self.stack_pos += 1
-        self._current_line = line
-        return tok
-
-    def current_line(self):
-        return self._current_line
-
-    def context(self):
-        return self.stack_pos
-
-    def restore(self, ctx):
-        self.stack_pos = ctx
-
-    def peek(self):
-        """returns next token without consuming it"""
-        ctx = self.context()
-        token = self.next()
-        self.restore(ctx)
-        return token
-
-    #### methods below have to be translated 
-    def offset(self, ctx=None):
-        if ctx is None:
-            return self.stack_pos
-        else:
-            assert type(ctx)==int
-            return ctx
-
-    def get_pos(self):
-        if self.stack_pos >= len(self.stack):
-            return self.pos
-        else:
-            token, line, pos = self.stack[self.stack_pos]
-            return pos
-
-    def get_source_text(self, pos0, pos1 ):
-        return self.input[pos0:pos1]
-        
-    def debug(self):
-        """return context for debug information"""
-        return 'line %s : %s' % ('XXX', self._current_line)
-
-NONE_LIST = [tokenmod.ENDMARKER, tokenmod.INDENT, tokenmod.DEDENT,]
-NAMED_LIST = [tokenmod.OP, ]
-
-def token_from_values(tok_type, tok_string):
-    """XXX Compatibility layer between both parsers"""
-    if tok_type in NONE_LIST:
-        return Token(tokenmod.tok_name[tok_type], None)
-    if tok_type in NAMED_LIST:
-        return Token(tok_string, None)
-    if tok_type == tokenmod.NEWLINE:
-        return Token('NEWLINE', '') # XXX pending comment ?
-    return Token(tokenmod.tok_name[tok_type], tok_string)
-
-Source = PythonSource
-
-def tokenize_file(filename):
-    f = file(filename).read()
-    src = Source(f)
-    token = src.next()
-    while token != ("ENDMARKER", None) and token != (None, None):
-        print token
-        token = src.next()
-
-if __name__ == '__main__':
-    import sys
-    tokenize_file(sys.argv[1])

Deleted: /pypy/dist/pypy/module/recparser/pythonparse.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pythonparse.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-from pythonlexer import Source
-from ebnfparse import parse_grammar
-import sys
-import os
-import symbol
-import grammar
-
-# parse the python grammar corresponding to our CPython version
-_ver = ".".join([str(i) for i in sys.version_info[:2]])
-PYTHON_GRAMMAR = os.path.join( os.path.dirname(__file__), "data", "Grammar" + _ver )
-
-def python_grammar():
-    """returns a """
-    level = grammar.DEBUG
-    grammar.DEBUG = 0
-    gram = parse_grammar( file(PYTHON_GRAMMAR) )
-    grammar.DEBUG = level
-    # Build first sets for each rule (including anonymous ones)
-    grammar.build_first_sets(gram.items)
-    return gram
-
-PYTHON_PARSER = python_grammar()
-
-
-def parse_python_source( textsrc, gram, goal, builder=None ):
-    """Parse a python source according to goal"""
-    target = gram.rules[goal]
-    src = Source(textsrc)
-    if builder is None:
-        builder = grammar.BaseGrammarBuilder(debug=False, rules=gram.rules)
-    result = target.match(src, builder)
-    # <HACK> XXX find a clean way to process encoding declarations
-    builder.source_encoding = src.encoding
-    # </HACK>
-    if not result:
-        return None
-    # raise SyntaxError("at %s" % src.debug() )
-    return builder
-
-def parse_file_input(pyf, gram, builder=None):
-    """Parse a python file"""
-    return parse_python_source( pyf.read(), gram, "file_input", builder )
-    
-def parse_single_input(textsrc, gram, builder=None):
-    """Parse a python single statement"""
-    return parse_python_source( textsrc, gram, "single_input", builder )
-
-def parse_eval_input(textsrc, gram, builder=None):
-    """Parse a python expression"""
-    return parse_python_source( textsrc, gram, "eval_input", builder )

Deleted: /pypy/dist/pypy/module/recparser/pythonutil.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pythonutil.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,98 +0,0 @@
-__all__ = ["python_parse", "pypy_parse","ast_single_input", "ast_file_input",
-           "ast_eval_input" ]
-
-import grammar
-import pythonparse
-from compiler.transformer import Transformer
-from tuplebuilder import TupleBuilder
-
-PYTHON_PARSER = pythonparse.PYTHON_PARSER
-
-def python_parse(filename):
-    """parse <filename> using CPython's parser module and return nested tuples
-    """
-    pyf = file(filename)
-    import parser
-    tp2 = parser.suite(pyf.read())
-    return tp2.totuple()
-
-import symbol
-def pypy_parse(filename):
-    """parse <filename> using PyPy's parser module and return
-    a tuple of three elements :
-     - The encoding declaration symbol or None if there were no encoding
-       statement
-     - The TupleBuilder's stack top element (instance of
-       tuplebuilder.StackElement which is a wrapper of some nested tuples
-       like those returned by the CPython's parser)
-     - The encoding string or None if there were no encoding statement
-    nested tuples
-    """
-    pyf = file(filename)
-    text = pyf.read()
-    pyf.close()
-    builder = TupleBuilder(PYTHON_PARSER.rules, lineno=False)
-    # make the annotator life easier
-    strings = [line+'\n' for line in text.split('\n')]
-    pythonparse.parse_python_source(strings, PYTHON_PARSER, 'file_input', builder)
-    nested_tuples = builder.stack[-1]
-    if builder.source_encoding is not None:
-        return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
-    else:
-        return (None, nested_tuples, None)
-
-def annotateme(strings):
-    builder = TupleBuilder(PYTHON_PARSER.rules, lineno=False)
-    pythonparse.parse_python_source(strings, PYTHON_PARSER, 'file_input', builder)
-    nested_tuples = builder.stack[-1]
-    if builder.source_encoding is not None:
-        return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
-    else:
-        return (None, nested_tuples, None)
-
-def ast_single_input( text ):
-    builder = TupleBuilder( PYTHON_PARSER.rules )
-    pythonparse.parse_python_source( text, PYTHON_PARSER, "single_input", builder )
-    tree = builder.stack[-1]
-    trans = Transformer()
-    ast = trans.transform( tree )
-    return ast
-
-def ast_file_input( filename ):
-    pyf = file(filename,"r")
-    text = pyf.read()
-    return ast_srcfile_input( text, filename )
-
-def ast_srcfile_input( srctext, filename ):
-    # TODO do something with the filename
-    builder = TupleBuilder( PYTHON_PARSER.rules )
-    pythonparse.parse_python_source( srctext, PYTHON_PARSER, "file_input", builder )
-    tree = builder.stack[-1]
-    trans = Transformer()
-    ast = trans.transform( tree )
-    return ast
-
-def ast_eval_input( textsrc ):
-    builder = TupleBuilder( PYTHON_PARSER.rules )
-    pythonparse.parse_python_source( textsrc, PYTHON_PARSER, "eval_input", builder )
-    tree = builder.stack[-1]
-    trans = Transformer()
-    ast = trans.transform( tree )
-    return ast
-
-
-
-if __name__ == "__main__":
-    import sys
-    if len(sys.argv) < 2:
-        print "python parse.py [-d N] test_file.py"
-        sys.exit(1)
-    if sys.argv[1] == "-d":
-        debug_level = int(sys.argv[2])
-        test_file = sys.argv[3]
-    else:
-        test_file = sys.argv[1]
-    print "-"*20
-    print
-    print "pyparse \n", pypy_parse(test_file)
-    print "parser  \n", python_parse(test_file)

Deleted: /pypy/dist/pypy/module/recparser/pytokenize.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pytokenize.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,334 +0,0 @@
-#! /usr/bin/env python
-# ______________________________________________________________________
-"""Module pytokenize
-
-THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
-TO BE ANNOTABLE (Mainly made lists homogeneous)
-
-This is a modified version of Ka-Ping Yee's tokenize module found in the
-Python standard library.
-
-The primary modification is the removal of the tokenizer's dependence on the
-standard Python regular expression module, which is written in C.  The regular
-expressions have been replaced with hand built DFA's using the
-basil.util.automata module.
-
-XXX This now assumes that the automata module is in the Python path.
-
-$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $
-"""
-# ______________________________________________________________________
-
-from __future__ import generators
-import string
-import automata
-
-# ______________________________________________________________________
-# COPIED:
-from token import *
-
-import token
-__all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize",
-           "generate_tokens", "NL"]
-del x
-del token
-
-COMMENT = N_TOKENS
-tok_name[COMMENT] = 'COMMENT'
-NL = N_TOKENS + 1
-tok_name[NL] = 'NL'
-N_TOKENS += 2
-
-# ______________________________________________________________________
-# Automatically generated DFA's (with one or two hand tweeks):
-pseudoStatesAccepts = [True, True, True, True, True, True, True, True,
-                       True, True, False, True, True, True, False, False,
-                       False, False, True, False, False, True, True, False,
-                       True, False, True, False, True, False, True, False,
-                       False, False, True, False, False, False, True]
-
-pseudoStates = [
-    {'\t': 0, '\n': 13, '\x0c': 0, '\r': 14, ' ': 0, '!': 10,
-     '"': 16, '#': 18, '%': 12, '&': 12,
-     "'": 15, '(': 13, ')': 13, '*': 7,
-     '+': 12, ',': 13, '-': 12, '.': 6,
-     '/': 11, '0': 4, '1': 5, '2': 5,
-     '3': 5, '4': 5, '5': 5, '6': 5,
-     '7': 5, '8': 5, '9': 5, ':': 13,
-     ';': 13, '<': 9, '=': 12, '>': 8, 'A': 1,
-     'B': 1, 'C': 1, 'D': 1, 'E': 1,
-     'F': 1, 'G': 1, 'H': 1, 'I': 1,
-     'J': 1, 'K': 1, 'L': 1, 'M': 1,
-     'N': 1, 'O': 1, 'P': 1, 'Q': 1,
-     'R': 2, 'S': 1, 'T': 1, 'U': 3,
-     'V': 1, 'W': 1, 'X': 1, 'Y': 1,
-     'Z': 1, '[': 13, '\\': 17, ']': 13,
-     '^': 12, '_': 1, '`': 13, 'a': 1,
-     'b': 1, 'c': 1, 'd': 1, 'e': 1,
-     'f': 1, 'g': 1, 'h': 1, 'i': 1,
-     'j': 1, 'k': 1, 'l': 1, 'm': 1,
-     'n': 1, 'o': 1, 'p': 1, 'q': 1,
-     'r': 2, 's': 1, 't': 1, 'u': 3,
-     'v': 1, 'w': 1, 'x': 1, 'y': 1,
-     'z': 1, '{': 13, '|': 12, '}': 13,
-     '~': 13},
-
-    {'0': 1, '1': 1, '2': 1, '3': 1,
-     '4': 1, '5': 1, '6': 1, '7': 1,
-     '8': 1, '9': 1, 'A': 1, 'B': 1,
-     'C': 1, 'D': 1, 'E': 1, 'F': 1,
-     'G': 1, 'H': 1, 'I': 1, 'J': 1,
-     'K': 1, 'L': 1, 'M': 1, 'N': 1,
-     'O': 1, 'P': 1, 'Q': 1, 'R': 1,
-     'S': 1, 'T': 1, 'U': 1, 'V': 1,
-     'W': 1, 'X': 1, 'Y': 1, 'Z': 1,
-     '_': 1, 'a': 1, 'b': 1, 'c': 1,
-     'd': 1, 'e': 1, 'f': 1, 'g': 1,
-     'h': 1, 'i': 1, 'j': 1, 'k': 1,
-     'l': 1, 'm': 1, 'n': 1, 'o': 1,
-     'p': 1, 'q': 1, 'r': 1, 's': 1,
-     't': 1, 'u': 1, 'v': 1, 'w': 1,
-     'x': 1, 'y': 1, 'z': 1},
-
-    {'"': 20, "'": 19, '0': 1, '1': 1,
-     '2': 1, '3': 1, '4': 1, '5': 1,
-     '6': 1, '7': 1, '8': 1, '9': 1,
-     'A': 1, 'B': 1, 'C': 1, 'D': 1,
-     'E': 1, 'F': 1, 'G': 1, 'H': 1,
-     'I': 1, 'J': 1, 'K': 1, 'L': 1,
-     'M': 1, 'N': 1, 'O': 1, 'P': 1,
-     'Q': 1, 'R': 1, 'S': 1, 'T': 1,
-     'U': 1, 'V': 1, 'W': 1, 'X': 1,
-     'Y': 1, 'Z': 1, '_': 1, 'a': 1,
-     'b': 1, 'c': 1, 'd': 1, 'e': 1,
-     'f': 1, 'g': 1, 'h': 1, 'i': 1,
-     'j': 1, 'k': 1, 'l': 1, 'm': 1,
-     'n': 1, 'o': 1, 'p': 1, 'q': 1,
-     'r': 1, 's': 1, 't': 1, 'u': 1,
-     'v': 1, 'w': 1, 'x': 1, 'y': 1,
-     'z': 1},
-
-    {'"': 20, "'": 19, '0': 1, '1': 1,
-     '2': 1, '3': 1, '4': 1, '5': 1,
-     '6': 1, '7': 1, '8': 1, '9': 1,
-     'A': 1, 'B': 1, 'C': 1, 'D': 1,
-     'E': 1, 'F': 1, 'G': 1, 'H': 1,
-     'I': 1, 'J': 1, 'K': 1, 'L': 1,
-     'M': 1, 'N': 1, 'O': 1, 'P': 1,
-     'Q': 1, 'R': 2, 'S': 1, 'T': 1,
-     'U': 1, 'V': 1, 'W': 1, 'X': 1,
-     'Y': 1, 'Z': 1, '_': 1, 'a': 1,
-     'b': 1, 'c': 1, 'd': 1, 'e': 1,
-     'f': 1, 'g': 1, 'h': 1, 'i': 1,
-     'j': 1, 'k': 1, 'l': 1, 'm': 1,
-     'n': 1, 'o': 1, 'p': 1, 'q': 1,
-     'r': 2, 's': 1, 't': 1, 'u': 1,
-     'v': 1, 'w': 1, 'x': 1, 'y': 1,
-     'z': 1},
-
-    {'.': 24, '0': 22, '1': 22, '2': 22,
-     '3': 22, '4': 22, '5': 22, '6': 22,
-     '7': 22, '8': 23, '9': 23, 'E': 25,
-     'J': 13, 'L': 13, 'X': 21, 'e': 25,
-     'j': 13, 'l': 13, 'x': 21},
-
-    {'.': 24, '0': 5, '1': 5, '2': 5,
-     '3': 5, '4': 5, '5': 5, '6': 5,
-     '7': 5, '8': 5, '9': 5, 'E': 25,
-     'J': 13, 'L': 13, 'e': 25, 'j': 13,
-     'l': 13},
-
-    {'0': 26, '1': 26, '2': 26, '3': 26,
-     '4': 26, '5': 26, '6': 26, '7': 26,
-     '8': 26, '9': 26},
-
-    {'*': 12, '=': 13},
-
-    {'=': 13, '>': 12},
-
-    {'=': 13, '<': 12, '>': 13},
-
-    {'=': 13},
-
-    {'=': 13, '/': 12},
-
-    {'=': 13},
-
-    {},
-
-    {'\n': 13},
-
-    {automata.DEFAULT: 19, '\n': 27, '\\': 29, "'": 28},
-
-    {automata.DEFAULT: 20, '"': 30, '\n': 27, '\\': 31},
-
-    {'\n': 13, '\r': 14},
-
-    {automata.DEFAULT: 18, '\n': 27, '\r': 27},
-
-    {automata.DEFAULT: 19, '\n': 27, '\\': 29, "'": 13},
-
-    {automata.DEFAULT: 20, '"': 13, '\n': 27, '\\': 31},
-                
-    {'0': 21, '1': 21, '2': 21, '3': 21,
-     '4': 21, '5': 21, '6': 21, '7': 21,
-     '8': 21, '9': 21, 'A': 21, 'B': 21,
-     'C': 21, 'D': 21, 'E': 21, 'F': 21,
-     'L': 13, 'a': 21, 'b': 21, 'c': 21,
-     'd': 21, 'e': 21, 'f': 21, 'l': 13},
-    
-    {'.': 24, '0': 22, '1': 22, '2': 22,
-     '3': 22, '4': 22, '5': 22, '6': 22,
-     '7': 22, '8': 23, '9': 23, 'E': 25,
-     'J': 13, 'L': 13, 'e': 25, 'j': 13,
-     'l': 13},
-    
-    {'.': 24, '0': 23, '1': 23, '2': 23,
-     '3': 23, '4': 23, '5': 23, '6': 23,
-     '7': 23, '8': 23, '9': 23, 'E': 25,
-     'J': 13, 'e': 25, 'j': 13},
-    
-    {'0': 24, '1': 24, '2': 24, '3': 24,
-     '4': 24, '5': 24, '6': 24, '7': 24,
-     '8': 24, '9': 24, 'E': 32, 'J': 13,
-     'e': 32, 'j': 13},
-    
-    {'+': 33, '-': 33, '0': 34, '1': 34,
-     '2': 34, '3': 34, '4': 34, '5': 34,
-     '6': 34, '7': 34, '8': 34, '9': 34},
-    
-    {'0': 26, '1': 26, '2': 26, '3': 26,
-     '4': 26, '5': 26, '6': 26, '7': 26,
-     '8': 26, '9': 26, 'E': 32, 'J': 13,
-     'e': 32, 'j': 13},
-    
-    {},
-
-    {"'": 13},
-
-    {automata.DEFAULT: 35, '\n': 13, '\r': 14},
-
-    {'"': 13},
-
-    {automata.DEFAULT: 36, '\n': 13, '\r': 14},
-
-    {'+': 37, '-': 37, '0': 38, '1': 38,
-     '2': 38, '3': 38, '4': 38, '5': 38,
-     '6': 38, '7': 38, '8': 38, '9': 38},
-    
-    
-    {'0': 34, '1': 34, '2': 34, '3': 34,
-     '4': 34, '5': 34, '6': 34, '7': 34,
-     '8': 34, '9': 34},
-    
-    {'0': 34, '1': 34, '2': 34, '3': 34,
-     '4': 34, '5': 34, '6': 34, '7': 34,
-     '8': 34, '9': 34, 'J': 13, 'j': 13},
-    
-    {automata.DEFAULT: 35, '\n': 27, '\\': 29, "'": 13},
-    
-    {automata.DEFAULT: 36, '"': 13, '\n': 27, '\\': 31},
-    
-    {'0': 38, '1': 38, '2': 38, '3': 38,
-     '4': 38, '5': 38, '6': 38, '7': 38,
-     '8': 38, '9': 38},
-
-    {'0': 38, '1': 38, '2': 38, '3': 38,
-     '4': 38, '5': 38, '6': 38, '7': 38,
-     '8': 38, '9': 38, 'J': 13, 'j': 13},
-    ]
-
-pseudoDFA = automata.DFA(pseudoStates, pseudoStatesAccepts)
-
-double3StatesAccepts = [False, False, False, False, False, True]
-double3States = [
-    {automata.DEFAULT: 0, '"': 1, '\\': 2},
-    {automata.DEFAULT: 4, '"': 3, '\\': 2},
-    {automata.DEFAULT: 4},
-    {automata.DEFAULT: 4, '"': 5, '\\': 2},
-    {automata.DEFAULT: 4, '"': 1, '\\': 2},
-    {automata.DEFAULT: 4, '"': 5, '\\': 2},
-    ]
-double3DFA = automata.NonGreedyDFA(double3States, double3StatesAccepts)
-
-single3StatesAccepts = [False, False, False, False, False, True]
-single3States = [
-    {automata.DEFAULT: 0, '\\': 2, "'": 1},
-    {automata.DEFAULT: 4, '\\': 2, "'": 3},
-    {automata.DEFAULT: 4},
-    {automata.DEFAULT: 4, '\\': 2, "'": 5},
-    {automata.DEFAULT: 4, '\\': 2, "'": 1},
-    {automata.DEFAULT: 4, '\\': 2, "'": 5},
-    ]
-single3DFA = automata.NonGreedyDFA(single3States, single3StatesAccepts)
-
-singleStatesAccepts = [False, True, False]
-singleStates = [
-    {automata.DEFAULT: 0, '\\': 2, "'": 1},
-    {},
-    {automata.DEFAULT: 0},
-    ]
-singleDFA = automata.DFA(singleStates, singleStatesAccepts)
-
-doubleStatesAccepts = [False, True, False]
-doubleStates = [
-    {automata.DEFAULT: 0, '"': 1, '\\': 2},
-    {},
-    {automata.DEFAULT: 0},
-    ]
-doubleDFA = automata.DFA(doubleStates, doubleStatesAccepts)
-
-endDFAs = {"'" : singleDFA,
-           '"' : doubleDFA,
-           "r" : None,
-           "R" : None,
-           "u" : None,
-           "U" : None}
-
-for uniPrefix in ("", "u", "U"):
-    for rawPrefix in ("", "r", "R"):
-        prefix = uniPrefix + rawPrefix
-        endDFAs[prefix + "'''"] = single3DFA
-        endDFAs[prefix + '"""'] = double3DFA
-
-whiteSpaceStatesAccepts = [True]
-whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}]
-whiteSpaceDFA = automata.DFA(whiteSpaceStates, whiteSpaceStatesAccepts)
-
-# ______________________________________________________________________
-# COPIED:
-
-triple_quoted = {}
-for t in ("'''", '"""',
-          "r'''", 'r"""', "R'''", 'R"""',
-          "u'''", 'u"""', "U'''", 'U"""',
-          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
-          "uR'''", 'uR"""', "UR'''", 'UR"""'):
-    triple_quoted[t] = t
-single_quoted = {}
-for t in ("'", '"',
-          "r'", 'r"', "R'", 'R"',
-          "u'", 'u"', "U'", 'U"',
-          "ur'", 'ur"', "Ur'", 'Ur"',
-          "uR'", 'uR"', "UR'", 'UR"' ):
-    single_quoted[t] = t
-
-tabsize = 8
-
-# PYPY MODIFICATION: removed TokenError class as it's not needed here
-
-# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here
-
-# PYPY MODIFICATION: removed printtoken() as it's not needed here
-
-# PYPY MODIFICATION: removed tokenize() as it's not needed here
-
-# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here
-
-# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified
-#                    in pythonlexer.py
-
-# PYPY MODIFICATION: removed main() as it's not needed here
-
-# ______________________________________________________________________
-# End of pytokenize.py

Deleted: /pypy/dist/pypy/module/recparser/syntaxtree.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/syntaxtree.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,162 +0,0 @@
-import symbol
-import token
-
-TOKEN_MAP = {
-    "STRING" : token.STRING,
-    "NUMBER" : token.NUMBER,
-    "NAME" : token.NAME,
-    "NEWLINE" : token.NEWLINE,
-    "DEDENT" : token.DEDENT,
-    "ENDMARKER" : token.ENDMARKER,
-    "INDENT" : token.INDENT,
-    "NEWLINE" : token.NEWLINE,
-    "NT_OFFSET" : token.NT_OFFSET,
-    "N_TOKENS" : token.N_TOKENS,
-    "OP" : token.OP,
-    "?ERRORTOKEN" : token.ERRORTOKEN,
-    "&" : token.AMPER,
-    "&=" : token.AMPEREQUAL,
-    "`" : token.BACKQUOTE,
-    "^" : token.CIRCUMFLEX,
-    "^=" : token.CIRCUMFLEXEQUAL,
-    ":" : token.COLON,
-    "," : token.COMMA,
-    "." : token.DOT,
-    "//" : token.DOUBLESLASH,
-    "//=" : token.DOUBLESLASHEQUAL,
-    "**" : token.DOUBLESTAR,
-    "**=" : token.DOUBLESTAREQUAL,
-    "==" : token.EQEQUAL,
-    "=" : token.EQUAL,
-    ">" : token.GREATER,
-    ">=" : token.GREATEREQUAL,
-    "{" : token.LBRACE,
-    "}" : token.RBRACE,
-    "<<" : token.LEFTSHIFT,
-    "<<=" : token.LEFTSHIFTEQUAL,
-    "<" : token.LESS,
-    "<=" : token.LESSEQUAL,
-    "(" : token.LPAR,
-    "[" : token.LSQB,
-    "-=" : token.MINEQUAL,
-    "-" : token.MINUS,
-    "!=" : token.NOTEQUAL,
-    "<>" : token.NOTEQUAL,
-    "%" : token.PERCENT,
-    "%=" : token.PERCENTEQUAL,
-    "+" : token.PLUS,
-    "+=" : token.PLUSEQUAL,
-    ")" : token.RBRACE,
-    ">>" : token.RIGHTSHIFT,
-    ">>=" : token.RIGHTSHIFTEQUAL,
-    ")" : token.RPAR,
-    "]" : token.RSQB,
-    ";" : token.SEMI,
-    "/" : token.SLASH,
-    "/=" : token.SLASHEQUAL,
-    "*" : token.STAR,
-    "*=" : token.STAREQUAL,
-    "~" : token.TILDE,
-    "|" : token.VBAR,
-    "|=" : token.VBAREQUAL,
-    }
-NT_OFFSET = token.NT_OFFSET    
-
-SYMBOLS = {}
-# copies the numerical mapping between symbol name and symbol value
-# into SYMBOLS
-for k,v in symbol.sym_name.items():
-    SYMBOLS[v] = k
-SYMBOLS['UNKNOWN'] = -1
-
-
-class SyntaxNode(object):
-    """A syntax node"""
-    def __init__(self, name, source, args):
-        self.name = name
-        self.nodes = args
-        self.lineno = source.current_line()
-        
-    def dumptree(self, treenodes, indent):
-        treenodes.append(self.name)
-        if len(self.nodes) > 1:
-            treenodes.append(" -> (\n")
-            treenodes.append(indent+" ")
-            for node in self.nodes:
-                node.dumptree(treenodes, indent+" ")
-            treenodes.append(")\n")
-            treenodes.append(indent)
-        elif len(self.nodes) == 1:
-            treenodes.append(" ->\n")
-            treenodes.append(indent+" ")
-            self.nodes[0].dumptree(treenodes, indent+" ")
-
-    def dumpstr(self):
-        treenodes = []
-        self.dumptree(treenodes, "")
-        return "".join(treenodes)
-
-    def __repr__(self):
-        return "<node [%s] at 0x%x>" % (self.name, id(self))
-
-    def __str__(self):
-        return "(%s)"  % self.name
-
-    def visit(self, visitor):
-        """NOT RPYTHON, used only at bootstrap time anyway"""
-        visit_meth = getattr(visitor, "visit_%s" % self.name, None)
-        if visit_meth:
-            return visit_meth(self)
-        # helper function for nodes that have only one subnode:
-        if len(self.nodes) == 1:
-            return self.nodes[0].visit(visitor)
-        raise RuntimeError("Unknonw Visitor for %r" % self.name)
-
-    def expand(self):
-        return [ self ]
-
-    def totuple(self, lineno=False ):
-        symvalue = SYMBOLS.get( self.name, (0, self.name) )
-        l = [ symvalue ]
-        l += [node.totuple(lineno) for node in self.nodes]
-        return tuple(l)
-    
-
-class TempSyntaxNode(SyntaxNode):
-    """A temporary syntax node to represent intermediate rules"""
-    def expand(self):
-        return self.nodes
-
-class TokenNode(SyntaxNode):
-    """A token node"""
-    def __init__(self, name, source, value):
-        SyntaxNode.__init__(self, name, source, [])
-        self.value = value
-
-    def dumptree(self, treenodes, indent):
-        if self.value:
-            treenodes.append("%s='%s' (%d) " % (self.name, self.value, self.lineno))
-        else:
-            treenodes.append("'%s' (%d) " % (self.name, self.lineno))
-
-    def __repr__(self):
-        if self.value is not None:
-            return "<%s=%s>" % ( self.name, repr(self.value))
-        else:
-            return "<%s!>" % (self.name,)
-
-    def totuple(self, lineno=False):
-        num = TOKEN_MAP.get(self.name, -1)
-        if num == -1:
-            print "Unknown", self.name, self.value
-        if self.value is not None:
-            val = self.value
-        else:
-            if self.name not in ("NEWLINE", "INDENT", "DEDENT", "ENDMARKER"):
-                val = self.name
-            else:
-                val = self.value or ''
-        if lineno:
-            return (num, val, self.lineno)
-        else:
-            return (num, val)

Deleted: /pypy/dist/pypy/module/recparser/test_lookahead.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/test_lookahead.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,84 +0,0 @@
-from pypy.module.recparser.grammar import Alternative, Sequence, KleenStar, \
-     Token, EmptyToken, build_first_sets
-
-class TestLookAheadBasics:
-
-    def setup_method(self, method):
-        self.tok1 = Token('t1', 'foo')
-        self.tok2 = Token('t2', 'bar')
-        self.tok3 = Token('t3', 'foobar')
-        self.tokens = [self.tok1, self.tok2, self.tok3]
-        build_first_sets(self.tokens)        
-
-    def test_basic_token(self):
-        assert self.tok1.first_set == [self.tok1]
-
-
-    def test_basic_alternative(self):
-        alt = Alternative('alt', self.tokens)
-        build_first_sets([alt])
-        assert alt.first_set == self.tokens
-
-
-    def test_basic_sequence(self):
-        seq = Sequence('seq', self.tokens)
-        build_first_sets([seq])
-        assert seq.first_set == [self.tokens[0]]
-
-    def test_basic_kleenstar(self):
-        tok1, tok2, tok3 = self.tokens
-        kstar = KleenStar('kstar', 1, 3, tok1)
-        build_first_sets([kstar])
-        assert kstar.first_set == [tok1]
-        kstar = KleenStar('kstar', 0, 3, tok1)
-        build_first_sets([kstar])
-        assert kstar.first_set == [tok1, EmptyToken]
-
-
-    def test_maybe_empty_sequence(self):
-        """S -> tok1{0,2} tok2{0,2}
-         ==> S.first_set = [tok1, tok2, EmptyToken]
-        """
-        tok1, tok2, tok3 = self.tokens
-        k1 = KleenStar('k1', 0, 2, tok1)
-        k2 = KleenStar('k1', 0, 2, tok2)
-        seq = Sequence('seq', [k1, k2])
-        build_first_sets([k1, k2, seq])
-        assert seq.first_set == [tok1, tok2, EmptyToken]
-
-
-    def test_not_empty_sequence(self):
-        """S -> tok1{0,2} tok2{1,2}
-         ==> S.first_set = [tok1, tok2]
-        """
-        tok1, tok2, tok3 = self.tokens
-        k1 = KleenStar('k1', 0, 2, tok1)
-        k2 = KleenStar('k1', 1, 2, tok2)
-        seq = Sequence('seq', [k1, k2])
-        build_first_sets([k1, k2, seq])
-        assert seq.first_set == [tok1, tok2]
-
-def test_token_comparison():
-    assert Token('t1', 'foo') == Token('t1', 'foo')
-    assert Token('t1', 'foo') != Token('t2', 'foo')
-    assert Token('t2', 'foo') != Token('t1', None)
-
-
-class TestLookAhead:
-
-     def setup_method(self, method):
-         self.LOW = Token('LOW', 'low')
-         self.CAP = Token('CAP' ,'cap')
-         self.A = Alternative('A', [])
-         k1 = KleenStar('k1', 0, rule=self.LOW)
-         k2 = KleenStar('k2', 0, rule=self.CAP)
-         self.B = Sequence('B', [k1, self.A])
-         self.C = Sequence('C', [k2, self.A])
-         self.A.args = [self.B, self.C]
-         build_first_sets([self.A, self.B, self.C, self.LOW, self.CAP, k1, k2])
-         
-     def test_S_first_set(self):
-         for s in  [Token('LOW', 'low'), EmptyToken, Token('CAP', 'cap')]:
-             assert s in self.A.first_set
-             assert s in self.B.first_set
-             assert s in self.C.first_set

Deleted: /pypy/dist/pypy/module/recparser/tuplebuilder.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/tuplebuilder.py	Sun Jul  3 11:38:44 2005
+++ (empty file)
@@ -1,118 +0,0 @@
-
-from grammar import BaseGrammarBuilder
-from syntaxtree import TOKEN_MAP, SYMBOLS # , NT_OFFSET
-
-## 
-## def _expand_nodes(nodes):
-##     expanded = []
-##     for n in nodes:
-##         if n[0] == -2:
-##             # expanded.extend(expand_nodes(n[1:]))
-##             expanded.extend(n[1:])
-##         else:
-##             expanded.append(n)
-##     return tuple(expanded)
-## 
-## def expand_nodes(nodes):
-##     r = _expand_nodes(nodes)
-##     for n in nodes:
-##         assert type(n[0]) == int
-##     return r
-## 
-
-class StackElement:
-    """wraps TupleBuilder's tuples"""
-
-class Terminal(StackElement):
-    def __init__(self, num, value, lineno=-1):
-        self.nodes = [(num, value, lineno)]
-        self.num = num
-
-    def as_tuple(self, lineno=None):
-        if lineno is not None:
-            return self.nodes[0]
-        else:
-            return self.nodes[0][:-1]
-
-class NonTerminal(StackElement):
-    def __init__(self, num, nodes, rulename=None):
-        """rulename should always be None with regular Python grammar"""
-        self.nodes = nodes
-        self.num = num
-
-    def as_tuple(self, lineno=None):
-        l = [self.num] + [node.as_tuple(lineno) for node in self.nodes]
-        return tuple(l)
-    
-        
-def expand_nodes(stack_elements):
-    """generate a nested tuples from a list of stack elements"""
-    expanded = []
-    for element in stack_elements:
-        if isinstance(element, NonTerminal) and element.num == -2:
-            expanded.extend(element.nodes)
-        else:
-            expanded.append(element)
-    return expanded
-
-class TupleBuilder(BaseGrammarBuilder):
-    """A builder that directly produce the AST"""
-
-    def __init__(self, rules=None, debug=0, lineno=True):
-        BaseGrammarBuilder.__init__(self, rules, debug)
-        # This attribute is here for convenience
-        self.source_encoding = None
-        self.lineno = lineno
-        self._unknown = -10
-
-    def _add_rule(self, rulename):
-        SYMBOLS[rulename] = self._unknown
-        self._unknown -= 1
-            
-    def alternative(self, rule, source):
-        # Do nothing, keep rule on top of the stack
-        if rule.is_root():
-            nodes = expand_nodes( [self.stack[-1]] )
-            if rule.name in SYMBOLS:
-                self.stack[-1] = NonTerminal(SYMBOLS[rule.name], nodes)
-            else:
-                # Using regular CPython's Grammar should not lead here
-                # XXX find how self._unknown is meant to be used
-                self.stack[-1] = NonTerminal(self._unknown, nodes, rule.name)
-                self._add_rule(rule.name)
-        return True
-            
-    def sequence(self, rule, source, elts_number):
-        """ """
-        if rule.is_root():
-            if rule.name in SYMBOLS:
-                num = SYMBOLS[rule.name]
-                node = [num]
-            else:
-                num = self._unknown
-                node = [num]
-                self._add_rule(rule.name)
-        else:
-            num = -2
-            node = [num]
-        if elts_number > 0:
-            sequence_elements = self.stack[-elts_number:]
-            nodes = expand_nodes( sequence_elements )
-            self.stack[-elts_number:] = [NonTerminal(num, nodes)]
-        else:
-            self.stack.append( NonTerminal(num, []) )
-        return True
-
-    def token(self, name, value, source):
-        num = TOKEN_MAP.get(name, -1)
-        lineno = source.current_line()
-        if value is None:
-            if name not in ("NEWLINE", "INDENT", "DEDENT", "ENDMARKER"):
-                value = name
-            else:
-                value = ''
-        if self.lineno:
-            self.stack.append( Terminal(num, value, lineno) )
-        else:
-            self.stack.append( Terminal(num, value, -1) )
-        return True



More information about the Pypy-commit mailing list