[pypy-svn] r14116 - in pypy/dist/pypy/module/recparser: . data test
adim at codespeak.net
adim at codespeak.net
Sun Jul 3 11:38:46 CEST 2005
Author: adim
Date: Sun Jul 3 11:38:44 2005
New Revision: 14116
Removed:
pypy/dist/pypy/module/recparser/automata.py
pypy/dist/pypy/module/recparser/data/
pypy/dist/pypy/module/recparser/ebnflexer.py
pypy/dist/pypy/module/recparser/ebnfparse.py
pypy/dist/pypy/module/recparser/grammar.py
pypy/dist/pypy/module/recparser/pythonlexer.py
pypy/dist/pypy/module/recparser/pythonparse.py
pypy/dist/pypy/module/recparser/pythonutil.py
pypy/dist/pypy/module/recparser/pytokenize.py
pypy/dist/pypy/module/recparser/syntaxtree.py
pypy/dist/pypy/module/recparser/test/
pypy/dist/pypy/module/recparser/test_lookahead.py
pypy/dist/pypy/module/recparser/tuplebuilder.py
Modified:
pypy/dist/pypy/module/recparser/compat.py
Log:
these files have been moved to interpreter/pyparser
Deleted: /pypy/dist/pypy/module/recparser/automata.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/automata.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,89 +0,0 @@
-#! /usr/bin/env python
-# ______________________________________________________________________
-"""Module automata
-
-THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
-TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
-instead of a unique nested one)
-
-$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
-"""
-# ______________________________________________________________________
-# Module level definitions
-
-# PYPY Modification: removed the EMPTY class as it's not needed here
-
-
-# PYPY Modification: we don't need a particuliar DEFAULT class here
-# a simple None works fine.
-# (Having a DefaultClass inheriting from str makes
-# the annotator crash)
-DEFAULT = None
-# PYPY Modification : removed all automata functions (any, maybe,
-# newArcPair, etc.)
-
-class DFA:
- # ____________________________________________________________
- def __init__(self, states, accepts, start = 0):
- self.states = states
- self.accepts = accepts
- self.start = start
-
- # ____________________________________________________________
- def recognize (self, inVec, pos = 0, greedy = True):
- crntState = self.start
- i = pos
- lastAccept = False
- for item in inVec[pos:]:
- # arcMap, accept = self.states[crntState]
- arcMap = self.states[crntState]
- accept = self.accepts[crntState]
- if item in arcMap:
- crntState = arcMap[item]
- elif DEFAULT in arcMap:
- crntState = arcMap[DEFAULT]
- elif accept:
- return i
- elif lastAccept:
- # This is now needed b/c of exception cases where there are
- # transitions to dead states
- return i - 1
- else:
- return -1
- lastAccept = accept
- i += 1
- # if self.states[crntState][1]:
- if self.accepts[crntState]:
- return i
- elif lastAccept:
- return i - 1
- else:
- return -1
-
-# ______________________________________________________________________
-
-class NonGreedyDFA (DFA):
- def recognize (self, inVec, pos = 0):
- crntState = self.start
- i = pos
- for item in inVec[pos:]:
- # arcMap, accept = self.states[crntState]
- arcMap = self.states[crntState]
- accept = self.accepts[crntState]
- if accept:
- return i
- elif item in arcMap:
- crntState = arcMap[item]
- elif DEFAULT in arcMap:
- crntState = arcMap[DEFAULT]
- else:
- return -1
- i += 1
- # if self.states[crntState][1]:
- if self.accepts[crntState]:
- return i
- else:
- return -1
-
-# ______________________________________________________________________
-# End of automata.py
Modified: pypy/dist/pypy/module/recparser/compat.py
==============================================================================
--- pypy/dist/pypy/module/recparser/compat.py (original)
+++ pypy/dist/pypy/module/recparser/compat.py Sun Jul 3 11:38:44 2005
@@ -3,14 +3,26 @@
from pythonparse import parse_python_source
from pythonutil import PYTHON_PARSER
from compiler import transformer, compile as pycompile
-
+
def suite( source ):
- builder = parse_python_source( source, PYTHON_PARSER, "file_input" )
- return builder.stack[-1]
+ strings = [line+'\n' for line in source.split('\n')]
+ builder = parse_python_source( strings, PYTHON_PARSER, "file_input" )
+ nested_tuples = builder.stack[-1].as_tuple()
+ if builder.source_encoding is not None:
+ return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
+ else:
+ return (None, nested_tuples, None)
+ return nested_tuples
def expr( source ):
- builder = parse_python_source( source, PYTHON_PARSER, "eval_input" )
- return builder.stack[-1]
+ strings = [line+'\n' for line in source.split('\n')]
+ builder = parse_python_source( strings, PYTHON_PARSER, "eval_input" )
+ nested_tuples = builder.stack[-1].as_tuple()
+ if builder.source_encoding is not None:
+ return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
+ else:
+ return (None, nested_tuples, None)
+ return nested_tuples
def ast2tuple(node, line_info=False):
"""Quick dummy implementation of parser.ast2tuple(tree) function"""
Deleted: /pypy/dist/pypy/module/recparser/ebnflexer.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/ebnflexer.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,83 +0,0 @@
-"""This is a lexer for a Python recursive descent parser
-it obeys the TokenSource interface defined for the grammar
-analyser in grammar.py
-"""
-
-import re
-from grammar import TokenSource, Token
-
-DEBUG = False
-
-## Lexer for Python's grammar ########################################
-g_symdef = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*:",re.M)
-g_symbol = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*",re.M)
-g_string = re.compile(r"'[^']+'",re.M)
-g_tok = re.compile(r"\[|\]|\(|\)|\*|\+|\|",re.M)
-g_skip = re.compile(r"\s*(#.*$)?",re.M)
-
-class GrammarSource(TokenSource):
- """The grammar tokenizer"""
- def __init__(self, inpstring ):
- TokenSource.__init__(self)
- self.input = inpstring
- self.pos = 0
- self._peeked = None
-
- def context(self):
- return self.pos, self._peeked
-
- def offset(self, ctx=None):
- if ctx is None:
- return self.pos
- else:
- assert type(ctx)==int
- return ctx
-
- def restore(self, ctx):
- self.pos, self._peeked = ctx
-
- def next(self):
- if self._peeked is not None:
- peeked = self._peeked
- self._peeked = None
- return peeked
-
- pos = self.pos
- inp = self.input
- m = g_skip.match(inp, pos)
- while m and pos!=m.end():
- pos = m.end()
- if pos==len(inp):
- self.pos = pos
- return Token("EOF", None)
- m = g_skip.match(inp, pos)
- m = g_symdef.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token('SYMDEF',tk[:-1])
- m = g_tok.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token(tk,tk)
- m = g_string.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token('STRING',tk[1:-1])
- m = g_symbol.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token('SYMBOL',tk)
- raise ValueError("Unknown token at pos=%d context='%s'" % (pos,inp[pos:pos+20]) )
-
- def peek(self):
- if self._peeked is not None:
- return self._peeked
- self._peeked = self.next()
- return self._peeked
-
- def debug(self):
- return self.input[self.pos:self.pos+20]
Deleted: /pypy/dist/pypy/module/recparser/ebnfparse.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/ebnfparse.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,277 +0,0 @@
-#!/usr/bin/env python
-from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
- KleenStar, GrammarElement, build_first_sets, EmptyToken
-from ebnflexer import GrammarSource
-
-import re
-py_name = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*", re.M)
-
-punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
- '//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
- '\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^', '>>', '&', '\\+', '\\*', '-', '/', '\\.', '\\*\\*', '%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':', '@', '\\[', '\\]', '`', '\\{', '\\}']
-
-py_punct = re.compile(r"""
->=|<>|!=|<|>|<=|==|~|
-\*=|//=|%=|\^=|<<=|\*\*=|\|=|\+=|>>=|=|&=|/=|-=|
-,|\^|>>|&|\+|\*|-|/|\.|\*\*|%|<<|//|\||
-\)|\(|;|:|@|\[|\]|`|\{|\}
-""", re.M | re.X)
-
-
-TERMINALS = [
- 'NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
- 'INDENT', 'DEDENT' ]
-
-
-## Grammar Visitors ##################################################
-# FIXME: parsertools.py ? parser/__init__.py ?
-
-class NameToken(Token):
- """A token that is not a keyword"""
- def __init__(self, keywords=None ):
- Token.__init__(self, "NAME")
- self.keywords = keywords
-
- def match(self, source, builder, level=0):
- """Matches a token.
- the default implementation is to match any token whose type
- corresponds to the object's name. You can extend Token
- to match anything returned from the lexer. for exemple
- type, value = source.next()
- if type=="integer" and int(value)>=0:
- # found
- else:
- # error unknown or negative integer
- """
- ctx = source.context()
- tk = source.next()
- if tk.name==self.name:
- if tk.value not in self.keywords:
- ret = builder.token( tk.name, tk.value, source )
- return self.debug_return( ret, tk.name, tk.value )
- source.restore( ctx )
- return 0
-
- def match_token(self, other):
- """convenience '==' implementation, this is *not* a *real* equality test
- a Token instance can be compared to:
- - another Token instance in which case all fields (name and value)
- must be equal
- - a tuple, such as those yielded by the Python lexer, in which case
- the comparison algorithm is similar to the one in match()
- XXX:
- 1/ refactor match and __eq__ ?
- 2/ make source.next and source.peek return a Token() instance
- """
- if not isinstance(other, Token):
- raise RuntimeError("Unexpected token type %r" % other)
- if other is EmptyToken:
- return False
- if other.name != self.name:
- return False
- if other.value in self.keywords:
- return False
- return True
-
-
-class EBNFVisitor(object):
-
- def __init__(self):
- self.rules = {}
- self.terminals = {}
- self.current_rule = None
- self.current_subrule = 0
- self.tokens = {}
- self.items = []
- self.terminals['NAME'] = NameToken()
-
- def new_name( self ):
- rule_name = ":%s_%s" % (self.current_rule, self.current_subrule)
- self.current_subrule += 1
- return rule_name
-
- def new_item( self, itm ):
- self.items.append( itm )
- return itm
-
- def visit_grammar( self, node ):
- # print "Grammar:"
- for rule in node.nodes:
- rule.visit(self)
- # the rules are registered already
- # we do a pass through the variables to detect
- # terminal symbols from non terminals
- for r in self.items:
- for i,a in enumerate(r.args):
- if a.name in self.rules:
- assert isinstance(a,Token)
- r.args[i] = self.rules[a.name]
- if a.name in self.terminals:
- del self.terminals[a.name]
- # XXX .keywords also contains punctuations
- self.terminals['NAME'].keywords = self.tokens.keys()
-
- def visit_rule( self, node ):
- symdef = node.nodes[0].value
- self.current_rule = symdef
- self.current_subrule = 0
- alt = node.nodes[1]
- rule = alt.visit(self)
- if not isinstance( rule, Token ):
- rule.name = symdef
- self.rules[symdef] = rule
-
- def visit_alternative( self, node ):
- items = [ node.nodes[0].visit(self) ]
- items += node.nodes[1].visit(self)
- if len(items) == 1 and items[0].name.startswith(':'):
- return items[0]
- alt = Alternative( self.new_name(), items )
- return self.new_item( alt )
-
- def visit_sequence( self, node ):
- """ """
- items = []
- for n in node.nodes:
- items.append( n.visit(self) )
- if len(items)==1:
- return items[0]
- elif len(items)>1:
- return self.new_item( Sequence( self.new_name(), items) )
- raise SyntaxError("Found empty sequence")
-
- def visit_sequence_cont( self, node ):
- """Returns a list of sequences (possibly empty)"""
- return [n.visit(self) for n in node.nodes]
-
- def visit_seq_cont_list(self, node):
- return node.nodes[1].visit(self)
-
-
- def visit_symbol(self, node):
- star_opt = node.nodes[1]
- sym = node.nodes[0].value
- terminal = self.terminals.get( sym )
- if not terminal:
- terminal = Token( sym )
- self.terminals[sym] = terminal
-
- return self.repeat( star_opt, terminal )
-
- def visit_option( self, node ):
- rule = node.nodes[1].visit(self)
- return self.new_item( KleenStar( self.new_name(), 0, 1, rule ) )
-
- def visit_group( self, node ):
- rule = node.nodes[1].visit(self)
- return self.repeat( node.nodes[3], rule )
-
- def visit_STRING( self, node ):
- value = node.value
- tok = self.tokens.get(value)
- if not tok:
- if py_punct.match( value ):
- tok = Token( value )
- elif py_name.match( value ):
- tok = Token('NAME', value)
- else:
- raise SyntaxError("Unknown STRING value ('%s')" % value )
- self.tokens[value] = tok
- return tok
-
- def visit_sequence_alt( self, node ):
- res = node.nodes[0].visit(self)
- assert isinstance( res, GrammarElement )
- return res
-
- def repeat( self, star_opt, myrule ):
- if star_opt.nodes:
- rule_name = self.new_name()
- tok = star_opt.nodes[0].nodes[0]
- if tok.value == '+':
- return self.new_item( KleenStar( rule_name, _min=1, rule = myrule ) )
- elif tok.value == '*':
- return self.new_item( KleenStar( rule_name, _min=0, rule = myrule ) )
- else:
- raise SyntaxError("Got symbol star_opt with value='%s'" % tok.value )
- return myrule
-
-rules = None
-
-def grammar_grammar():
- """Builds the grammar for the grammar file
-
- Here's the description of the grammar's grammar ::
-
- grammar: rule+
- rule: SYMDEF alternative
-
- alternative: sequence ( '|' sequence )+
- star: '*' | '+'
- sequence: (SYMBOL star? | STRING | option | group star? )+
- option: '[' alternative ']'
- group: '(' alternative ')' star?
- """
- global rules
- # star: '*' | '+'
- star = Alternative( "star", [Token('*'), Token('+')] )
- star_opt = KleenStar ( "star_opt", 0, 1, rule=star )
-
- # rule: SYMBOL ':' alternative
- symbol = Sequence( "symbol", [Token('SYMBOL'), star_opt] )
- symboldef = Token( "SYMDEF" )
- alternative = Sequence( "alternative", [])
- rule = Sequence( "rule", [symboldef, alternative] )
-
- # grammar: rule+
- grammar = KleenStar( "grammar", _min=1, rule=rule )
-
- # alternative: sequence ( '|' sequence )*
- sequence = KleenStar( "sequence", 1 )
- seq_cont_list = Sequence( "seq_cont_list", [Token('|'), sequence] )
- sequence_cont = KleenStar( "sequence_cont",0, rule=seq_cont_list )
-
- alternative.args = [ sequence, sequence_cont ]
-
- # option: '[' alternative ']'
- option = Sequence( "option", [Token('['), alternative, Token(']')] )
-
- # group: '(' alternative ')'
- group = Sequence( "group", [Token('('), alternative, Token(')'), star_opt] )
-
- # sequence: (SYMBOL | STRING | option | group )+
- string = Token('STRING')
- alt = Alternative( "sequence_alt", [symbol, string, option, group] )
- sequence.args = [ alt ]
-
-
- rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
- seq_cont_list, sequence_cont, option, group, alt ]
- build_first_sets( rules )
- return grammar
-
-
-def parse_grammar(stream):
- """parses the grammar file
-
- stream : file-like object representing the grammar to parse
- """
- source = GrammarSource(stream.read())
- rule = grammar_grammar()
- builder = BaseGrammarBuilder()
- result = rule.match(source, builder)
- node = builder.stack[-1]
- vis = EBNFVisitor()
- node.visit(vis)
- return vis
-
-
-from pprint import pprint
-if __name__ == "__main__":
- grambuild = parse_grammar(file('data/Grammar2.3'))
- for i,r in enumerate(grambuild.items):
- print "% 3d : %s" % (i, r)
- pprint(grambuild.terminals.keys())
- pprint(grambuild.tokens)
- print "|".join(grambuild.tokens.keys() )
-
Deleted: /pypy/dist/pypy/module/recparser/grammar.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/grammar.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,559 +0,0 @@
-"""
-a generic recursive descent parser
-the grammar is defined as a composition of objects
-the objects of the grammar are :
-Alternative : as in S -> A | B | C
-Sequence : as in S -> A B C
-KleenStar : as in S -> A* or S -> A+
-Token : a lexer token
-"""
-
-DEBUG = 0
-USE_LOOKAHEAD = True
-
-#### Abstract interface for a lexer/tokenizer
-class TokenSource(object):
- """Abstract base class for a source tokenizer"""
- def context(self):
- """Returns a context to restore the state of the object later"""
-
- def restore(self, ctx):
- """Restore the context"""
-
- def next(self):
- """Returns the next token from the source
- a token is a tuple : (type,value) or (None,None) if the end of the
- source has been found
- """
-
- def offset(self, ctx=None):
- """Returns the position we're at so far in the source
- optionnally provide a context and you'll get the offset
- of the context"""
- return -1
-
- def current_line(self):
- """Returns the current line number"""
- return 0
-
- def get_pos(self):
- """Returns the current source position of the scanner"""
- return 0
-
- def get_source_text(self, pos1, pos2 ):
- """Returns the source text between two scanner positions"""
- return ""
-
-
-######################################################################
-
-
-def build_first_sets(rules):
- """builds the real first tokens set for each rule in <rules>
-
- Because a rule can be recursive (directly or indirectly), the
- *simplest* algorithm to build each first set is to recompute them
- until Computation(N) = Computation(N-1), N being the number of rounds.
- As an example, on Python2.3's grammar, we need 19 cycles to compute
- full first sets.
- """
- changed = True
- while changed:
- # loop while one first set is changed
- changed = False
- for rule in rules:
- # For each rule, recompute first set
- size = len(rule.first_set)
- rule.calc_first_set()
- new_size = len(rule.first_set)
- if new_size != size:
- changed = True
- for r in rules:
- assert len(r.first_set) > 0, "Error: ot Empty firstset for %s" % r
- r.reorder_rule()
-
-
-from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
-
-class BaseGrammarBuilder(object):
- """Base/default class for a builder"""
- def __init__(self, rules=None, debug=0):
- self.rules = rules or {} # a dictionary of grammar rules for debug/reference
- # XXX This attribute is here for convenience
- self.source_encoding = None
- self.debug = debug
- self.stack = []
-
- def context(self):
- """Returns the state of the builder to be restored later"""
- #print "Save Stack:", self.stack
- return len(self.stack)
-
- def restore(self, ctx):
- del self.stack[ctx:]
- #print "Restore Stack:", self.stack
-
- def alternative(self, rule, source):
- # Do nothing, keep rule on top of the stack
- if rule.is_root():
- elems = self.stack[-1].expand()
- self.stack[-1] = SyntaxNode(rule.name, source, elems)
- if self.debug:
- self.stack[-1].dumpstr()
- return True
-
- def sequence(self, rule, source, elts_number):
- """ """
- items = []
- for node in self.stack[-elts_number:]:
- items += node.expand()
- if rule.is_root():
- node_type = SyntaxNode
- else:
- node_type = TempSyntaxNode
- # replace N elements with 1 element regrouping them
- if elts_number >= 1:
- elem = node_type(rule.name, source, items)
- del self.stack[-elts_number:]
- self.stack.append(elem)
- elif elts_number == 0:
- self.stack.append(node_type(rule.name, source, []))
- if self.debug:
- self.stack[-1].dumpstr()
- return True
-
- def token(self, name, value, source):
- self.stack.append(TokenNode(name, source, value))
- if self.debug:
- self.stack[-1].dumpstr()
- return True
-
-
-######################################################################
-# Grammar Elements Classes (Alternative, Sequence, KleenStar, Token) #
-######################################################################
-class GrammarElement(object):
- """Base parser class"""
- def __init__(self, name):
- # the rule name
- self.name = name
- self.args = []
- self._is_root = False
- self.first_set = []
- self.first_set_complete = False
- # self._processing = False
- self._trace = False
-
- def is_root(self):
- """This is a root node of the grammar, that is one that will
- be included in the syntax tree"""
- if self.name != ":" and self.name.startswith(":"):
- return False
- return True
-
-
- def match(self, source, builder, level=0):
- """Try to match a grammar rule
-
- If next set of tokens matches this grammar element, use <builder>
- to build an appropriate object, otherwise returns None.
-
- /!\ If the sets of element didn't match the current grammar
- element, then the <source> is restored as it was before the
- call to the match() method
-
- returns None if no match or an object build by builder
- """
- if not USE_LOOKAHEAD:
- return self._match(source, builder, level)
- pos1 = -1 # XXX make the annotator happy
- pos2 = -1 # XXX make the annotator happy
- token = source.peek()
- if self._trace:
- pos1 = source.get_pos()
- in_first_set = self.match_first_set(token)
- if not in_first_set: # and not EmptyToken in self.first_set:
- if EmptyToken in self.first_set:
- ret = builder.sequence(self, source, 0 )
- if self._trace:
- self._debug_display(token, level, 'eee')
- return self.debug_return( ret, 0 )
- if self._trace:
- self._debug_display(token, level, 'rrr')
- return 0
- elif self._trace:
- self._debug_display(token, level, '>>>')
-
- res = self._match(source, builder, level)
- if self._trace:
- pos2 = source.get_pos()
- if res:
- prefix = '+++'
- else:
- prefix = '---'
- self._debug_display(token, level, prefix)
- print ' '*level, prefix, " TEXT ='%s'" % (
- source.get_source_text(pos1,pos2))
- if res:
- print "*" * 50
- return res
-
- def _debug_display(self, token, level, prefix):
- """prints context debug informations"""
- prefix = '%s%s' % (' ' * level, prefix)
- print prefix, " RULE =", self
- print prefix, " TOKEN =", token
- print prefix, " FIRST SET =", self.first_set
-
-
- def _match(self, source, builder, level=0):
- """Try to match a grammar rule
-
- If next set of tokens matches this grammar element, use <builder>
- to build an appropriate object, otherwise returns 0.
-
- /!\ If the sets of element didn't match the current grammar
- element, then the <source> is restored as it was before the
- call to the match() method
-
- returns None if no match or an object build by builder
- """
- return 0
-
- def parse(self, source):
- """Returns a simplified grammar if the rule matched at the source
- current context or None"""
- # **NOT USED** **NOT IMPLEMENTED**
- # To consider if we need to improve speed in parsing
- pass
-
- def __str__(self):
- return self.display(0)
-
- def __repr__(self):
- return self.display(0)
-
- def display(self, level):
- """Helper function used to represent the grammar.
- mostly used for debugging the grammar itself"""
- return "GrammarElement"
-
-
- def debug_return(self, ret, *args ):
- # FIXME: use a wrapper of match() methods instead of debug_return()
- # to prevent additional indirection
- if ret and DEBUG>0:
- sargs = ",".join( [ str(i) for i in args ] )
- print "matched %s (%s): %s" % (self.__class__.__name__, sargs, self.display() )
- return ret
-
-
- def calc_first_set(self):
- """returns the list of possible next tokens
- *must* be implemented in subclasses
- """
- # XXX: first_set could probably be implemented with sets
- return []
-
- def match_first_set(self, other):
- """matching is not equality:
- token('NAME','x') matches token('NAME',None)
- """
- for tk in self.first_set:
- if tk.match_token( other ):
- return True
- return False
-
- def in_first_set(self, other):
- return other in self.first_set
-
- def reorder_rule(self):
- """Called after the computation of first set to allow rules to be reordered
- to avoid ambiguities"""
- pass
-
-class Alternative(GrammarElement):
- """Represents an alternative in a grammar rule (as in S -> A | B | C)"""
- def __init__(self, name, args):
- GrammarElement.__init__(self, name )
- self.args = args
- self._reordered = False
- for i in self.args:
- assert isinstance( i, GrammarElement )
-
- def _match(self, source, builder, level=0):
- """If any of the rules in self.args matches
- returns the object built from the first rules that matches
- """
- if DEBUG>1:
- print "try alt:", self.display()
- tok = source.peek()
- # Here we stop at the first match we should
- # try instead to get the longest alternative
- # to see if this solve our problems with infinite recursion
- for rule in self.args:
- if USE_LOOKAHEAD:
- if not rule.match_first_set(tok) and EmptyToken not in rule.first_set:
- if self._trace:
- print "Skipping impossible rule: %s" % (rule,)
- continue
- m = rule.match(source, builder, level+1)
- if m:
- ret = builder.alternative( self, source )
- return self.debug_return( ret )
- return 0
-
- def display(self, level=0):
- if level==0:
- name = self.name + " -> "
- elif not self.name.startswith(":"):
- return self.name
- else:
- name = ""
- items = [ a.display(1) for a in self.args ]
- return name+"(" + "|".join( items ) + ")"
-
- def calc_first_set(self):
- """returns the list of possible next tokens
- if S -> (A | B | C):
- LAH(S) = Union( LAH(A), LAH(B), LAH(C) )
- """
- # do this to avoid problems on indirect recursive rules
- for rule in self.args:
- for t in rule.first_set:
- if t not in self.first_set:
- self.first_set.append(t)
- # self.first_set[t] = 1
-
- def reorder_rule(self):
- # take the opportunity to reorder rules in alternatives
- # so that rules with Empty in their first set come last
- # warn if two rules have empty in their first set
- empty_set = []
- not_empty_set = []
- # <tokens> is only needed for warning / debugging purposes
- tokens_set = []
- for rule in self.args:
- if EmptyToken in rule.first_set:
- empty_set.append(rule)
- else:
- not_empty_set.append(rule)
- if DEBUG:
- # This loop is only neede dfor warning / debugging purposes
- # It will check if a token is part of several first sets of
- # a same alternative
- for token in rule.first_set:
- if token is not EmptyToken and token in tokens_set:
- print "Warning, token %s in\n\t%s's first set is part " \
- "of a previous rule's first set in alternative\n\t" \
- "%s" % (token, rule, self)
- tokens_set.append(token)
- if len(empty_set) > 1 and not self._reordered:
- print "Warning: alternative %s has more than one rule matching Empty" % self
- self._reordered = True
- self.args[:] = not_empty_set
- self.args.extend( empty_set )
-
-
-class Sequence(GrammarElement):
- """Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
- def __init__(self, name, args):
- GrammarElement.__init__(self, name )
- self.args = args
- for i in self.args:
- assert isinstance( i, GrammarElement )
-
- def _match(self, source, builder, level=0):
- """matches all of the symbols in order"""
- if DEBUG>1:
- print "try seq:", self.display()
- ctx = source.context()
- bctx = builder.context()
- for rule in self.args:
- m = rule.match(source, builder, level+1)
- if not m:
- # Restore needed because some rules may have been matched
- # before the one that failed
- source.restore(ctx)
- builder.restore(bctx)
- return 0
- ret = builder.sequence(self, source, len(self.args))
- return self.debug_return( ret )
-
- def display(self, level=0):
- if level == 0:
- name = self.name + " -> "
- elif not self.name.startswith(":"):
- return self.name
- else:
- name = ""
- items = [a.display(1) for a in self.args]
- return name + "(" + " ".join( items ) + ")"
-
- def calc_first_set(self):
- """returns the list of possible next tokens
- if S -> A* B C:
- LAH(S) = Union( LAH(A), LAH(B) )
- if S -> A+ B C:
- LAH(S) = LAH(A)
- if S -> A B C:
- LAH(S) = LAH(A)
- """
- for rule in self.args:
- if not rule.first_set:
- break
- if EmptyToken in self.first_set:
- self.first_set.remove( EmptyToken )
-
- # del self.first_set[EmptyToken]
- # while we're in this loop, keep agregating possible tokens
- for t in rule.first_set:
- if t not in self.first_set:
- self.first_set.append(t)
- # self.first_set[t] = 1
- if EmptyToken not in rule.first_set:
- break
-
-
-
-class KleenStar(GrammarElement):
- """Represents a KleenStar in a grammar rule as in (S -> A+) or (S -> A*)"""
- def __init__(self, name, _min = 0, _max = -1, rule=None):
- GrammarElement.__init__( self, name )
- self.args = [rule]
- self.min = _min
- if _max == 0:
- raise ValueError("KleenStar needs max==-1 or max>1")
- self.max = _max
- self.star = "x"
- if self.min == 0:
- self.first_set.append( EmptyToken )
- # self.first_set[EmptyToken] = 1
-
- def _match(self, source, builder, level=0):
- """matches a number of times self.args[0]. the number must be comprised
- between self._min and self._max inclusive. -1 is used to represent infinity"""
- if DEBUG>1:
- print "try kle:", self.display()
- ctx = source.context()
- bctx = builder.context()
- rules = 0
- rule = self.args[0]
- while True:
- m = rule.match(source, builder, level+1)
- if not m:
- # Rule should be matched at least 'min' times
- if rules<self.min:
- source.restore(ctx)
- builder.restore(bctx)
- return 0
- ret = builder.sequence(self, source, rules)
- return self.debug_return( ret, rules )
- rules += 1
- if self.max>0 and rules == self.max:
- ret = builder.sequence(self, source, rules)
- return self.debug_return( ret, rules )
-
- def display(self, level=0):
- if level==0:
- name = self.name + " -> "
- elif not self.name.startswith(":"):
- return self.name
- else:
- name = ""
- star = "{%d,%d}" % (self.min,self.max)
- if self.min==0 and self.max==1:
- star = "?"
- elif self.min==0 and self.max==-1:
- star = "*"
- elif self.min==1 and self.max==-1:
- star = "+"
- s = self.args[0].display(1)
- return name + "%s%s" % (s, star)
-
-
- def calc_first_set(self):
- """returns the list of possible next tokens
- if S -> A*:
- LAH(S) = Union( LAH(A), EmptyToken )
- if S -> A+:
- LAH(S) = LAH(A)
- """
- rule = self.args[0]
- self.first_set = rule.first_set[:]
- # self.first_set = dict(rule.first_set)
- if self.min == 0 and EmptyToken not in self.first_set:
- self.first_set.append(EmptyToken)
- # self.first_set[EmptyToken] = 1
-
-class Token(GrammarElement):
- """Represents a Token in a grammar rule (a lexer token)"""
- def __init__( self, name, value = None):
- GrammarElement.__init__( self, name )
- self.value = value
- self.first_set = [self]
- # self.first_set = {self: 1}
-
- def match(self, source, builder, level=0):
- """Matches a token.
- the default implementation is to match any token whose type
- corresponds to the object's name. You can extend Token
- to match anything returned from the lexer. for exemple
- type, value = source.next()
- if type=="integer" and int(value)>=0:
- # found
- else:
- # error unknown or negative integer
- """
- ctx = source.context()
- tk = source.next()
- # XXX: match_token
- if tk.name == self.name:
- if self.value is None:
- ret = builder.token( tk.name, tk.value, source )
- return self.debug_return( ret, tk.name )
- elif self.value == tk.value:
- ret = builder.token( tk.name, tk.value, source )
- return self.debug_return( ret, tk.name, tk.value )
- if DEBUG>1:
- print "tried tok:", self.display()
- source.restore( ctx )
- return 0
-
- def display(self, level=0):
- if self.value is None:
- return "<%s>" % self.name
- else:
- return "<%s>=='%s'" % (self.name, self.value)
-
-
- def match_token(self, other):
- """convenience '==' implementation, this is *not* a *real* equality test
- a Token instance can be compared to:
- - another Token instance in which case all fields (name and value)
- must be equal
- - a tuple, such as those yielded by the Python lexer, in which case
- the comparison algorithm is similar to the one in match()
- XXX:
- 1/ refactor match and __eq__ ?
- 2/ make source.next and source.peek return a Token() instance
- """
- if not isinstance(other, Token):
- raise RuntimeError("Unexpected token type %r" % other)
- if other is EmptyToken:
- return False
- res = other.name == self.name and self.value in (None, other.value)
- #print "matching", self, other, res
- return res
-
- def __eq__(self, other):
- return self.name == other.name and self.value == other.value
-
-
-
- def calc_first_set(self):
- """computes the list of possible next tokens
- """
- pass
-
-EmptyToken = Token(None)
Deleted: /pypy/dist/pypy/module/recparser/pythonlexer.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pythonlexer.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,378 +0,0 @@
-"""This is a lexer for a Python recursive descent parser
-it obeys the TokenSource interface defined for the grammar
-analyser in grammar.py
-"""
-
-from grammar import TokenSource, Token
-# Don't import string for that ...
-NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
-NUMCHARS = '0123456789'
-ALNUMCHARS = NAMECHARS + NUMCHARS
-EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.'
-WHITESPACES = ' \t\n\r\v\f'
-
-def match_encoding_declaration(comment):
- """returns the declared encoding or None
-
- This function is a replacement for :
- >>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)")
- >>> py_encoding.search(comment)
- """
- index = comment.find('coding')
- if index == -1:
- return None
- next_char = comment[index + 6]
- if next_char not in ':=':
- return None
- end_of_decl = comment[index + 7:]
- index = 0
- for char in end_of_decl:
- if char not in WHITESPACES:
- break
- index += 1
- else:
- return None
- encoding = ''
- for char in end_of_decl[index:]:
- if char in EXTENDED_ALNUMCHARS:
- encoding += char
- else:
- break
- if encoding != '':
- return encoding
- return None
-
-def _normalize_encoding(encoding):
- """returns normalized name for <encoding>
-
- see dist/src/Parser/tokenizer.c 'get_normal_name()'
- for implementation details / reference
-
- NOTE: for now, parser.suite() raises a MemoryError when
- a bad encoding is used. (SF bug #979739)
- """
- # lower() + '_' / '-' conversion
- encoding = encoding.replace('_', '-').lower()
- if encoding.startswith('utf-8'):
- return 'utf-8'
- for variant in ('latin-1', 'iso-latin-1', 'iso-8859-1'):
- if encoding.startswith(variant):
- return 'iso-8859-1'
- return encoding
-
-################################################################################
-import token as tokenmod
-from pytokenize import tabsize, whiteSpaceDFA, triple_quoted, endDFAs, \
- single_quoted, pseudoDFA
-import automata
-
-# adopt pytokenize notations / values
-tokenmod.COMMENT = tokenmod.N_TOKENS
-tokenmod.NL = tokenmod.N_TOKENS + 1
-
-class TokenError(Exception):
- """Raised when EOF is found prematuerly"""
- def __init__(self, msg, strstart, token_stack):
- # Exception.__init__(self, msg)
- self.strstart = strstart
- self.token_stack = token_stack
-
-
-def generate_tokens(lines):
- """
- This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since
- the original function is not RPYTHON (uses yield)
- It was also slightly modified to generate Token instances instead
- of the original 5-tuples
-
- Original docstring ::
-
- The generate_tokens() generator requires one argment, readline, which
- must be a callable object which provides the same interface as the
- readline() method of built-in file objects. Each call to the function
- should return one line of input as a string.
-
- The generator produces 5-tuples with these members: the token type; the
- token string; a 2-tuple (srow, scol) of ints specifying the row and
- column where the token begins in the source; a 2-tuple (erow, ecol) of
- ints specifying the row and column where the token ends in the source;
- and the line on which the token was found. The line passed is the
- logical line; continuation lines are included.
- """
- token_list = []
- lnum = parenlev = continued = 0
- namechars = NAMECHARS
- numchars = NUMCHARS
- contstr, needcont = '', 0
- contline = None
- indents = [0]
- last_comment = ''
- encoding = None
- strstart = (0, 0)
-
- lines.append('') # XXX HACK probably not needed
- endDFA = automata.DFA([], []) # XXX Make the translator happy
- line = '' # XXX Make the translator happy
- for line in lines:
- lnum = lnum + 1
- pos, max = 0, len(line)
-
- if contstr: # continued string
- if not line:
- raise TokenError("EOF in multi-line string", strstart, token_list)
- endmatch = endDFA.recognize(line)
- if -1 != endmatch:
- pos = end = endmatch
- tok = token_from_values(tokenmod.STRING, contstr + line[:end])
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((STRING, contstr + line[:end],
- # strstart, (lnum, end), contline + line))
- contstr, needcont = '', 0
- contline = None
- elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
- tok = token_from_values(tokenmod.ERRORTOKEN, contstr + line)
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((ERRORTOKEN, contstr + line,
- # strstart, (lnum, len(line)), contline))
- contstr = ''
- contline = None
- continue
- else:
- contstr = contstr + line
- contline = contline + line
- continue
-
- elif parenlev == 0 and not continued: # new statement
- if not line: break
- column = 0
- while pos < max: # measure leading whitespace
- if line[pos] == ' ': column = column + 1
- elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
- elif line[pos] == '\f': column = 0
- else: break
- pos = pos + 1
- if pos == max: break
-
- if line[pos] in '#\r\n': # skip comments or blank lines
- if line[pos] == '#':
- tok = token_from_values(tokenmod.COMMENT, line[pos:])
- last_comment = line[pos:]
- if lnum <= 2 and encoding is None:
- encoding = match_encoding_declaration(last_comment)
- if encoding is not None:
- encoding = _normalize_encoding(encoding)
- else:
- tok = token_from_values(tokenmod.NL, line[pos:])
- last_comment = ''
- # XXX Skip NL and COMMENT Tokens # token_list.append((tok, line))
- # token_list.append(((NL, COMMENT)[line[pos] == '#'], line[pos:],
- # (lnum, pos), (lnum, len(line)), line))
- continue
-
- if column > indents[-1]: # count indents or dedents
- indents.append(column)
- tok = token_from_values(tokenmod.INDENT, line[:pos])
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((INDENT, line[:pos], (lnum, 0), (lnum, pos), line))
- while column < indents[-1]:
- indents = indents[:-1]
- tok = token_from_values(tokenmod.DEDENT, '')
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((DEDENT, '', (lnum, pos), (lnum, pos), line))
-
- else: # continued statement
- if not line:
- raise TokenError("EOF in multi-line statement", (lnum, 0), token_list)
- continued = 0
-
- while pos < max:
- pseudomatch = pseudoDFA.recognize(line, pos)
- if -1 != pseudomatch: # scan for tokens
- # JDR: Modified
- start = whiteSpaceDFA.recognize(line, pos)
- if -1 == start:
- start = pos
- end = pseudomatch
-
- spos, epos, pos = (lnum, start), (lnum, end), end
- token, initial = line[start:end], line[start]
-
- if initial in numchars or \
- (initial == '.' and token != '.'): # ordinary number
- tok = token_from_values(tokenmod.NUMBER, token)
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((NUMBER, token, spos, epos, line))
- elif initial in '\r\n':
- if parenlev > 0:
- tok = token_from_values(tokenmod.NL, token)
- last_comment = ''
- # XXX Skip NL
- else:
- tok = token_from_values(tokenmod.NEWLINE, token)
- # XXX YUCK !
- tok.value = last_comment
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((parenlev > 0 and NL or NEWLINE, token, spos, epos, line))
- elif initial == '#':
- tok = token_from_values(tokenmod.COMMENT, token)
- last_comment = token
- if lnum <= 2 and encoding is None:
- encoding = match_encoding_declaration(last_comment)
- if encoding is not None:
- encoding = _normalize_encoding(encoding)
- # XXX Skip # token_list.append((tok, line))
- # token_list.append((COMMENT, token, spos, epos, line))
- elif token in triple_quoted:
- endDFA = endDFAs[token]
- endmatch = endDFA.recognize(line, pos)
- if -1 != endmatch: # all on one line
- pos = endmatch
- token = line[start:pos]
- tok = token_from_values(tokenmod.STRING, token)
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((STRING, token, spos, (lnum, pos), line))
- else:
- strstart = (lnum, start) # multiple lines
- contstr = line[start:]
- contline = line
- break
- elif initial in single_quoted or \
- token[:2] in single_quoted or \
- token[:3] in single_quoted:
- if token[-1] == '\n': # continued string
- strstart = (lnum, start)
- endDFA = (endDFAs[initial] or endDFAs[token[1]] or
- endDFAs[token[2]])
- contstr, needcont = line[start:], 1
- contline = line
- break
- else: # ordinary string
- tok = token_from_values(tokenmod.STRING, token)
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((STRING, token, spos, epos, line))
- elif initial in namechars: # ordinary name
- tok = token_from_values(tokenmod.NAME, token)
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((NAME, token, spos, epos, line))
- elif initial == '\\': # continued stmt
- continued = 1
- else:
- if initial in '([{': parenlev = parenlev + 1
- elif initial in ')]}': parenlev = parenlev - 1
- tok = token_from_values(tokenmod.OP, token)
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((OP, token, spos, epos, line))
- else:
- tok = token_from_values(tokenmod.ERRORTOKEN, line[pos])
- token_list.append((tok, line))
- last_comment = ''
- # token_list.append((ERRORTOKEN, line[pos],
- # (lnum, pos), (lnum, pos+1), line))
- pos = pos + 1
-
- last_comment = ''
- for indent in indents[1:]: # pop remaining indent levels
- tok = token_from_values(tokenmod.DEDENT, '')
- token_list.append((tok, line))
- # token_list.append((DEDENT, '', (lnum, 0), (lnum, 0), ''))
-
- ## <XXX> adim
- token_list.append((Token('NEWLINE', ''), line))
- ## </XXX>
- tok = token_from_values(tokenmod.ENDMARKER, '',)
- token_list.append((tok, line))
- # token_list.append((ENDMARKER, '', (lnum, 0), (lnum, 0), ''))
- return token_list, encoding
-
-class PythonSource(TokenSource):
- """This source uses Jonathan's tokenizer"""
- def __init__(self, strings):
- # TokenSource.__init__(self)
- tokens, encoding = generate_tokens(strings)
- self.token_stack = tokens
- self.encoding = encoding
- self._current_line = '' # the current line (as a string)
- self.stack_pos = 0
-
- def next(self):
- if self.stack_pos >= len(self.token_stack):
- raise StopIteration("Remove me")
- tok, line = self.token_stack[self.stack_pos]
- self.stack_pos += 1
- self._current_line = line
- return tok
-
- def current_line(self):
- return self._current_line
-
- def context(self):
- return self.stack_pos
-
- def restore(self, ctx):
- self.stack_pos = ctx
-
- def peek(self):
- """returns next token without consuming it"""
- ctx = self.context()
- token = self.next()
- self.restore(ctx)
- return token
-
- #### methods below have to be translated
- def offset(self, ctx=None):
- if ctx is None:
- return self.stack_pos
- else:
- assert type(ctx)==int
- return ctx
-
- def get_pos(self):
- if self.stack_pos >= len(self.stack):
- return self.pos
- else:
- token, line, pos = self.stack[self.stack_pos]
- return pos
-
- def get_source_text(self, pos0, pos1 ):
- return self.input[pos0:pos1]
-
- def debug(self):
- """return context for debug information"""
- return 'line %s : %s' % ('XXX', self._current_line)
-
-NONE_LIST = [tokenmod.ENDMARKER, tokenmod.INDENT, tokenmod.DEDENT,]
-NAMED_LIST = [tokenmod.OP, ]
-
-def token_from_values(tok_type, tok_string):
- """XXX Compatibility layer between both parsers"""
- if tok_type in NONE_LIST:
- return Token(tokenmod.tok_name[tok_type], None)
- if tok_type in NAMED_LIST:
- return Token(tok_string, None)
- if tok_type == tokenmod.NEWLINE:
- return Token('NEWLINE', '') # XXX pending comment ?
- return Token(tokenmod.tok_name[tok_type], tok_string)
-
-Source = PythonSource
-
-def tokenize_file(filename):
- f = file(filename).read()
- src = Source(f)
- token = src.next()
- while token != ("ENDMARKER", None) and token != (None, None):
- print token
- token = src.next()
-
-if __name__ == '__main__':
- import sys
- tokenize_file(sys.argv[1])
Deleted: /pypy/dist/pypy/module/recparser/pythonparse.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pythonparse.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-from pythonlexer import Source
-from ebnfparse import parse_grammar
-import sys
-import os
-import symbol
-import grammar
-
-# parse the python grammar corresponding to our CPython version
-_ver = ".".join([str(i) for i in sys.version_info[:2]])
-PYTHON_GRAMMAR = os.path.join( os.path.dirname(__file__), "data", "Grammar" + _ver )
-
-def python_grammar():
- """returns a """
- level = grammar.DEBUG
- grammar.DEBUG = 0
- gram = parse_grammar( file(PYTHON_GRAMMAR) )
- grammar.DEBUG = level
- # Build first sets for each rule (including anonymous ones)
- grammar.build_first_sets(gram.items)
- return gram
-
-PYTHON_PARSER = python_grammar()
-
-
-def parse_python_source( textsrc, gram, goal, builder=None ):
- """Parse a python source according to goal"""
- target = gram.rules[goal]
- src = Source(textsrc)
- if builder is None:
- builder = grammar.BaseGrammarBuilder(debug=False, rules=gram.rules)
- result = target.match(src, builder)
- # <HACK> XXX find a clean way to process encoding declarations
- builder.source_encoding = src.encoding
- # </HACK>
- if not result:
- return None
- # raise SyntaxError("at %s" % src.debug() )
- return builder
-
-def parse_file_input(pyf, gram, builder=None):
- """Parse a python file"""
- return parse_python_source( pyf.read(), gram, "file_input", builder )
-
-def parse_single_input(textsrc, gram, builder=None):
- """Parse a python single statement"""
- return parse_python_source( textsrc, gram, "single_input", builder )
-
-def parse_eval_input(textsrc, gram, builder=None):
- """Parse a python expression"""
- return parse_python_source( textsrc, gram, "eval_input", builder )
Deleted: /pypy/dist/pypy/module/recparser/pythonutil.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pythonutil.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,98 +0,0 @@
-__all__ = ["python_parse", "pypy_parse","ast_single_input", "ast_file_input",
- "ast_eval_input" ]
-
-import grammar
-import pythonparse
-from compiler.transformer import Transformer
-from tuplebuilder import TupleBuilder
-
-PYTHON_PARSER = pythonparse.PYTHON_PARSER
-
-def python_parse(filename):
- """parse <filename> using CPython's parser module and return nested tuples
- """
- pyf = file(filename)
- import parser
- tp2 = parser.suite(pyf.read())
- return tp2.totuple()
-
-import symbol
-def pypy_parse(filename):
- """parse <filename> using PyPy's parser module and return
- a tuple of three elements :
- - The encoding declaration symbol or None if there were no encoding
- statement
- - The TupleBuilder's stack top element (instance of
- tuplebuilder.StackElement which is a wrapper of some nested tuples
- like those returned by the CPython's parser)
- - The encoding string or None if there were no encoding statement
- nested tuples
- """
- pyf = file(filename)
- text = pyf.read()
- pyf.close()
- builder = TupleBuilder(PYTHON_PARSER.rules, lineno=False)
- # make the annotator life easier
- strings = [line+'\n' for line in text.split('\n')]
- pythonparse.parse_python_source(strings, PYTHON_PARSER, 'file_input', builder)
- nested_tuples = builder.stack[-1]
- if builder.source_encoding is not None:
- return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
- else:
- return (None, nested_tuples, None)
-
-def annotateme(strings):
- builder = TupleBuilder(PYTHON_PARSER.rules, lineno=False)
- pythonparse.parse_python_source(strings, PYTHON_PARSER, 'file_input', builder)
- nested_tuples = builder.stack[-1]
- if builder.source_encoding is not None:
- return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
- else:
- return (None, nested_tuples, None)
-
-def ast_single_input( text ):
- builder = TupleBuilder( PYTHON_PARSER.rules )
- pythonparse.parse_python_source( text, PYTHON_PARSER, "single_input", builder )
- tree = builder.stack[-1]
- trans = Transformer()
- ast = trans.transform( tree )
- return ast
-
-def ast_file_input( filename ):
- pyf = file(filename,"r")
- text = pyf.read()
- return ast_srcfile_input( text, filename )
-
-def ast_srcfile_input( srctext, filename ):
- # TODO do something with the filename
- builder = TupleBuilder( PYTHON_PARSER.rules )
- pythonparse.parse_python_source( srctext, PYTHON_PARSER, "file_input", builder )
- tree = builder.stack[-1]
- trans = Transformer()
- ast = trans.transform( tree )
- return ast
-
-def ast_eval_input( textsrc ):
- builder = TupleBuilder( PYTHON_PARSER.rules )
- pythonparse.parse_python_source( textsrc, PYTHON_PARSER, "eval_input", builder )
- tree = builder.stack[-1]
- trans = Transformer()
- ast = trans.transform( tree )
- return ast
-
-
-
-if __name__ == "__main__":
- import sys
- if len(sys.argv) < 2:
- print "python parse.py [-d N] test_file.py"
- sys.exit(1)
- if sys.argv[1] == "-d":
- debug_level = int(sys.argv[2])
- test_file = sys.argv[3]
- else:
- test_file = sys.argv[1]
- print "-"*20
- print
- print "pyparse \n", pypy_parse(test_file)
- print "parser \n", python_parse(test_file)
Deleted: /pypy/dist/pypy/module/recparser/pytokenize.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/pytokenize.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,334 +0,0 @@
-#! /usr/bin/env python
-# ______________________________________________________________________
-"""Module pytokenize
-
-THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
-TO BE ANNOTABLE (Mainly made lists homogeneous)
-
-This is a modified version of Ka-Ping Yee's tokenize module found in the
-Python standard library.
-
-The primary modification is the removal of the tokenizer's dependence on the
-standard Python regular expression module, which is written in C. The regular
-expressions have been replaced with hand built DFA's using the
-basil.util.automata module.
-
-XXX This now assumes that the automata module is in the Python path.
-
-$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $
-"""
-# ______________________________________________________________________
-
-from __future__ import generators
-import string
-import automata
-
-# ______________________________________________________________________
-# COPIED:
-from token import *
-
-import token
-__all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize",
- "generate_tokens", "NL"]
-del x
-del token
-
-COMMENT = N_TOKENS
-tok_name[COMMENT] = 'COMMENT'
-NL = N_TOKENS + 1
-tok_name[NL] = 'NL'
-N_TOKENS += 2
-
-# ______________________________________________________________________
-# Automatically generated DFA's (with one or two hand tweeks):
-pseudoStatesAccepts = [True, True, True, True, True, True, True, True,
- True, True, False, True, True, True, False, False,
- False, False, True, False, False, True, True, False,
- True, False, True, False, True, False, True, False,
- False, False, True, False, False, False, True]
-
-pseudoStates = [
- {'\t': 0, '\n': 13, '\x0c': 0, '\r': 14, ' ': 0, '!': 10,
- '"': 16, '#': 18, '%': 12, '&': 12,
- "'": 15, '(': 13, ')': 13, '*': 7,
- '+': 12, ',': 13, '-': 12, '.': 6,
- '/': 11, '0': 4, '1': 5, '2': 5,
- '3': 5, '4': 5, '5': 5, '6': 5,
- '7': 5, '8': 5, '9': 5, ':': 13,
- ';': 13, '<': 9, '=': 12, '>': 8, 'A': 1,
- 'B': 1, 'C': 1, 'D': 1, 'E': 1,
- 'F': 1, 'G': 1, 'H': 1, 'I': 1,
- 'J': 1, 'K': 1, 'L': 1, 'M': 1,
- 'N': 1, 'O': 1, 'P': 1, 'Q': 1,
- 'R': 2, 'S': 1, 'T': 1, 'U': 3,
- 'V': 1, 'W': 1, 'X': 1, 'Y': 1,
- 'Z': 1, '[': 13, '\\': 17, ']': 13,
- '^': 12, '_': 1, '`': 13, 'a': 1,
- 'b': 1, 'c': 1, 'd': 1, 'e': 1,
- 'f': 1, 'g': 1, 'h': 1, 'i': 1,
- 'j': 1, 'k': 1, 'l': 1, 'm': 1,
- 'n': 1, 'o': 1, 'p': 1, 'q': 1,
- 'r': 2, 's': 1, 't': 1, 'u': 3,
- 'v': 1, 'w': 1, 'x': 1, 'y': 1,
- 'z': 1, '{': 13, '|': 12, '}': 13,
- '~': 13},
-
- {'0': 1, '1': 1, '2': 1, '3': 1,
- '4': 1, '5': 1, '6': 1, '7': 1,
- '8': 1, '9': 1, 'A': 1, 'B': 1,
- 'C': 1, 'D': 1, 'E': 1, 'F': 1,
- 'G': 1, 'H': 1, 'I': 1, 'J': 1,
- 'K': 1, 'L': 1, 'M': 1, 'N': 1,
- 'O': 1, 'P': 1, 'Q': 1, 'R': 1,
- 'S': 1, 'T': 1, 'U': 1, 'V': 1,
- 'W': 1, 'X': 1, 'Y': 1, 'Z': 1,
- '_': 1, 'a': 1, 'b': 1, 'c': 1,
- 'd': 1, 'e': 1, 'f': 1, 'g': 1,
- 'h': 1, 'i': 1, 'j': 1, 'k': 1,
- 'l': 1, 'm': 1, 'n': 1, 'o': 1,
- 'p': 1, 'q': 1, 'r': 1, 's': 1,
- 't': 1, 'u': 1, 'v': 1, 'w': 1,
- 'x': 1, 'y': 1, 'z': 1},
-
- {'"': 20, "'": 19, '0': 1, '1': 1,
- '2': 1, '3': 1, '4': 1, '5': 1,
- '6': 1, '7': 1, '8': 1, '9': 1,
- 'A': 1, 'B': 1, 'C': 1, 'D': 1,
- 'E': 1, 'F': 1, 'G': 1, 'H': 1,
- 'I': 1, 'J': 1, 'K': 1, 'L': 1,
- 'M': 1, 'N': 1, 'O': 1, 'P': 1,
- 'Q': 1, 'R': 1, 'S': 1, 'T': 1,
- 'U': 1, 'V': 1, 'W': 1, 'X': 1,
- 'Y': 1, 'Z': 1, '_': 1, 'a': 1,
- 'b': 1, 'c': 1, 'd': 1, 'e': 1,
- 'f': 1, 'g': 1, 'h': 1, 'i': 1,
- 'j': 1, 'k': 1, 'l': 1, 'm': 1,
- 'n': 1, 'o': 1, 'p': 1, 'q': 1,
- 'r': 1, 's': 1, 't': 1, 'u': 1,
- 'v': 1, 'w': 1, 'x': 1, 'y': 1,
- 'z': 1},
-
- {'"': 20, "'": 19, '0': 1, '1': 1,
- '2': 1, '3': 1, '4': 1, '5': 1,
- '6': 1, '7': 1, '8': 1, '9': 1,
- 'A': 1, 'B': 1, 'C': 1, 'D': 1,
- 'E': 1, 'F': 1, 'G': 1, 'H': 1,
- 'I': 1, 'J': 1, 'K': 1, 'L': 1,
- 'M': 1, 'N': 1, 'O': 1, 'P': 1,
- 'Q': 1, 'R': 2, 'S': 1, 'T': 1,
- 'U': 1, 'V': 1, 'W': 1, 'X': 1,
- 'Y': 1, 'Z': 1, '_': 1, 'a': 1,
- 'b': 1, 'c': 1, 'd': 1, 'e': 1,
- 'f': 1, 'g': 1, 'h': 1, 'i': 1,
- 'j': 1, 'k': 1, 'l': 1, 'm': 1,
- 'n': 1, 'o': 1, 'p': 1, 'q': 1,
- 'r': 2, 's': 1, 't': 1, 'u': 1,
- 'v': 1, 'w': 1, 'x': 1, 'y': 1,
- 'z': 1},
-
- {'.': 24, '0': 22, '1': 22, '2': 22,
- '3': 22, '4': 22, '5': 22, '6': 22,
- '7': 22, '8': 23, '9': 23, 'E': 25,
- 'J': 13, 'L': 13, 'X': 21, 'e': 25,
- 'j': 13, 'l': 13, 'x': 21},
-
- {'.': 24, '0': 5, '1': 5, '2': 5,
- '3': 5, '4': 5, '5': 5, '6': 5,
- '7': 5, '8': 5, '9': 5, 'E': 25,
- 'J': 13, 'L': 13, 'e': 25, 'j': 13,
- 'l': 13},
-
- {'0': 26, '1': 26, '2': 26, '3': 26,
- '4': 26, '5': 26, '6': 26, '7': 26,
- '8': 26, '9': 26},
-
- {'*': 12, '=': 13},
-
- {'=': 13, '>': 12},
-
- {'=': 13, '<': 12, '>': 13},
-
- {'=': 13},
-
- {'=': 13, '/': 12},
-
- {'=': 13},
-
- {},
-
- {'\n': 13},
-
- {automata.DEFAULT: 19, '\n': 27, '\\': 29, "'": 28},
-
- {automata.DEFAULT: 20, '"': 30, '\n': 27, '\\': 31},
-
- {'\n': 13, '\r': 14},
-
- {automata.DEFAULT: 18, '\n': 27, '\r': 27},
-
- {automata.DEFAULT: 19, '\n': 27, '\\': 29, "'": 13},
-
- {automata.DEFAULT: 20, '"': 13, '\n': 27, '\\': 31},
-
- {'0': 21, '1': 21, '2': 21, '3': 21,
- '4': 21, '5': 21, '6': 21, '7': 21,
- '8': 21, '9': 21, 'A': 21, 'B': 21,
- 'C': 21, 'D': 21, 'E': 21, 'F': 21,
- 'L': 13, 'a': 21, 'b': 21, 'c': 21,
- 'd': 21, 'e': 21, 'f': 21, 'l': 13},
-
- {'.': 24, '0': 22, '1': 22, '2': 22,
- '3': 22, '4': 22, '5': 22, '6': 22,
- '7': 22, '8': 23, '9': 23, 'E': 25,
- 'J': 13, 'L': 13, 'e': 25, 'j': 13,
- 'l': 13},
-
- {'.': 24, '0': 23, '1': 23, '2': 23,
- '3': 23, '4': 23, '5': 23, '6': 23,
- '7': 23, '8': 23, '9': 23, 'E': 25,
- 'J': 13, 'e': 25, 'j': 13},
-
- {'0': 24, '1': 24, '2': 24, '3': 24,
- '4': 24, '5': 24, '6': 24, '7': 24,
- '8': 24, '9': 24, 'E': 32, 'J': 13,
- 'e': 32, 'j': 13},
-
- {'+': 33, '-': 33, '0': 34, '1': 34,
- '2': 34, '3': 34, '4': 34, '5': 34,
- '6': 34, '7': 34, '8': 34, '9': 34},
-
- {'0': 26, '1': 26, '2': 26, '3': 26,
- '4': 26, '5': 26, '6': 26, '7': 26,
- '8': 26, '9': 26, 'E': 32, 'J': 13,
- 'e': 32, 'j': 13},
-
- {},
-
- {"'": 13},
-
- {automata.DEFAULT: 35, '\n': 13, '\r': 14},
-
- {'"': 13},
-
- {automata.DEFAULT: 36, '\n': 13, '\r': 14},
-
- {'+': 37, '-': 37, '0': 38, '1': 38,
- '2': 38, '3': 38, '4': 38, '5': 38,
- '6': 38, '7': 38, '8': 38, '9': 38},
-
-
- {'0': 34, '1': 34, '2': 34, '3': 34,
- '4': 34, '5': 34, '6': 34, '7': 34,
- '8': 34, '9': 34},
-
- {'0': 34, '1': 34, '2': 34, '3': 34,
- '4': 34, '5': 34, '6': 34, '7': 34,
- '8': 34, '9': 34, 'J': 13, 'j': 13},
-
- {automata.DEFAULT: 35, '\n': 27, '\\': 29, "'": 13},
-
- {automata.DEFAULT: 36, '"': 13, '\n': 27, '\\': 31},
-
- {'0': 38, '1': 38, '2': 38, '3': 38,
- '4': 38, '5': 38, '6': 38, '7': 38,
- '8': 38, '9': 38},
-
- {'0': 38, '1': 38, '2': 38, '3': 38,
- '4': 38, '5': 38, '6': 38, '7': 38,
- '8': 38, '9': 38, 'J': 13, 'j': 13},
- ]
-
-pseudoDFA = automata.DFA(pseudoStates, pseudoStatesAccepts)
-
-double3StatesAccepts = [False, False, False, False, False, True]
-double3States = [
- {automata.DEFAULT: 0, '"': 1, '\\': 2},
- {automata.DEFAULT: 4, '"': 3, '\\': 2},
- {automata.DEFAULT: 4},
- {automata.DEFAULT: 4, '"': 5, '\\': 2},
- {automata.DEFAULT: 4, '"': 1, '\\': 2},
- {automata.DEFAULT: 4, '"': 5, '\\': 2},
- ]
-double3DFA = automata.NonGreedyDFA(double3States, double3StatesAccepts)
-
-single3StatesAccepts = [False, False, False, False, False, True]
-single3States = [
- {automata.DEFAULT: 0, '\\': 2, "'": 1},
- {automata.DEFAULT: 4, '\\': 2, "'": 3},
- {automata.DEFAULT: 4},
- {automata.DEFAULT: 4, '\\': 2, "'": 5},
- {automata.DEFAULT: 4, '\\': 2, "'": 1},
- {automata.DEFAULT: 4, '\\': 2, "'": 5},
- ]
-single3DFA = automata.NonGreedyDFA(single3States, single3StatesAccepts)
-
-singleStatesAccepts = [False, True, False]
-singleStates = [
- {automata.DEFAULT: 0, '\\': 2, "'": 1},
- {},
- {automata.DEFAULT: 0},
- ]
-singleDFA = automata.DFA(singleStates, singleStatesAccepts)
-
-doubleStatesAccepts = [False, True, False]
-doubleStates = [
- {automata.DEFAULT: 0, '"': 1, '\\': 2},
- {},
- {automata.DEFAULT: 0},
- ]
-doubleDFA = automata.DFA(doubleStates, doubleStatesAccepts)
-
-endDFAs = {"'" : singleDFA,
- '"' : doubleDFA,
- "r" : None,
- "R" : None,
- "u" : None,
- "U" : None}
-
-for uniPrefix in ("", "u", "U"):
- for rawPrefix in ("", "r", "R"):
- prefix = uniPrefix + rawPrefix
- endDFAs[prefix + "'''"] = single3DFA
- endDFAs[prefix + '"""'] = double3DFA
-
-whiteSpaceStatesAccepts = [True]
-whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}]
-whiteSpaceDFA = automata.DFA(whiteSpaceStates, whiteSpaceStatesAccepts)
-
-# ______________________________________________________________________
-# COPIED:
-
-triple_quoted = {}
-for t in ("'''", '"""',
- "r'''", 'r"""', "R'''", 'R"""',
- "u'''", 'u"""', "U'''", 'U"""',
- "ur'''", 'ur"""', "Ur'''", 'Ur"""',
- "uR'''", 'uR"""', "UR'''", 'UR"""'):
- triple_quoted[t] = t
-single_quoted = {}
-for t in ("'", '"',
- "r'", 'r"', "R'", 'R"',
- "u'", 'u"', "U'", 'U"',
- "ur'", 'ur"', "Ur'", 'Ur"',
- "uR'", 'uR"', "UR'", 'UR"' ):
- single_quoted[t] = t
-
-tabsize = 8
-
-# PYPY MODIFICATION: removed TokenError class as it's not needed here
-
-# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here
-
-# PYPY MODIFICATION: removed printtoken() as it's not needed here
-
-# PYPY MODIFICATION: removed tokenize() as it's not needed here
-
-# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here
-
-# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified
-# in pythonlexer.py
-
-# PYPY MODIFICATION: removed main() as it's not needed here
-
-# ______________________________________________________________________
-# End of pytokenize.py
Deleted: /pypy/dist/pypy/module/recparser/syntaxtree.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/syntaxtree.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,162 +0,0 @@
-import symbol
-import token
-
-TOKEN_MAP = {
- "STRING" : token.STRING,
- "NUMBER" : token.NUMBER,
- "NAME" : token.NAME,
- "NEWLINE" : token.NEWLINE,
- "DEDENT" : token.DEDENT,
- "ENDMARKER" : token.ENDMARKER,
- "INDENT" : token.INDENT,
- "NEWLINE" : token.NEWLINE,
- "NT_OFFSET" : token.NT_OFFSET,
- "N_TOKENS" : token.N_TOKENS,
- "OP" : token.OP,
- "?ERRORTOKEN" : token.ERRORTOKEN,
- "&" : token.AMPER,
- "&=" : token.AMPEREQUAL,
- "`" : token.BACKQUOTE,
- "^" : token.CIRCUMFLEX,
- "^=" : token.CIRCUMFLEXEQUAL,
- ":" : token.COLON,
- "," : token.COMMA,
- "." : token.DOT,
- "//" : token.DOUBLESLASH,
- "//=" : token.DOUBLESLASHEQUAL,
- "**" : token.DOUBLESTAR,
- "**=" : token.DOUBLESTAREQUAL,
- "==" : token.EQEQUAL,
- "=" : token.EQUAL,
- ">" : token.GREATER,
- ">=" : token.GREATEREQUAL,
- "{" : token.LBRACE,
- "}" : token.RBRACE,
- "<<" : token.LEFTSHIFT,
- "<<=" : token.LEFTSHIFTEQUAL,
- "<" : token.LESS,
- "<=" : token.LESSEQUAL,
- "(" : token.LPAR,
- "[" : token.LSQB,
- "-=" : token.MINEQUAL,
- "-" : token.MINUS,
- "!=" : token.NOTEQUAL,
- "<>" : token.NOTEQUAL,
- "%" : token.PERCENT,
- "%=" : token.PERCENTEQUAL,
- "+" : token.PLUS,
- "+=" : token.PLUSEQUAL,
- ")" : token.RBRACE,
- ">>" : token.RIGHTSHIFT,
- ">>=" : token.RIGHTSHIFTEQUAL,
- ")" : token.RPAR,
- "]" : token.RSQB,
- ";" : token.SEMI,
- "/" : token.SLASH,
- "/=" : token.SLASHEQUAL,
- "*" : token.STAR,
- "*=" : token.STAREQUAL,
- "~" : token.TILDE,
- "|" : token.VBAR,
- "|=" : token.VBAREQUAL,
- }
-NT_OFFSET = token.NT_OFFSET
-
-SYMBOLS = {}
-# copies the numerical mapping between symbol name and symbol value
-# into SYMBOLS
-for k,v in symbol.sym_name.items():
- SYMBOLS[v] = k
-SYMBOLS['UNKNOWN'] = -1
-
-
-class SyntaxNode(object):
- """A syntax node"""
- def __init__(self, name, source, args):
- self.name = name
- self.nodes = args
- self.lineno = source.current_line()
-
- def dumptree(self, treenodes, indent):
- treenodes.append(self.name)
- if len(self.nodes) > 1:
- treenodes.append(" -> (\n")
- treenodes.append(indent+" ")
- for node in self.nodes:
- node.dumptree(treenodes, indent+" ")
- treenodes.append(")\n")
- treenodes.append(indent)
- elif len(self.nodes) == 1:
- treenodes.append(" ->\n")
- treenodes.append(indent+" ")
- self.nodes[0].dumptree(treenodes, indent+" ")
-
- def dumpstr(self):
- treenodes = []
- self.dumptree(treenodes, "")
- return "".join(treenodes)
-
- def __repr__(self):
- return "<node [%s] at 0x%x>" % (self.name, id(self))
-
- def __str__(self):
- return "(%s)" % self.name
-
- def visit(self, visitor):
- """NOT RPYTHON, used only at bootstrap time anyway"""
- visit_meth = getattr(visitor, "visit_%s" % self.name, None)
- if visit_meth:
- return visit_meth(self)
- # helper function for nodes that have only one subnode:
- if len(self.nodes) == 1:
- return self.nodes[0].visit(visitor)
- raise RuntimeError("Unknonw Visitor for %r" % self.name)
-
- def expand(self):
- return [ self ]
-
- def totuple(self, lineno=False ):
- symvalue = SYMBOLS.get( self.name, (0, self.name) )
- l = [ symvalue ]
- l += [node.totuple(lineno) for node in self.nodes]
- return tuple(l)
-
-
-class TempSyntaxNode(SyntaxNode):
- """A temporary syntax node to represent intermediate rules"""
- def expand(self):
- return self.nodes
-
-class TokenNode(SyntaxNode):
- """A token node"""
- def __init__(self, name, source, value):
- SyntaxNode.__init__(self, name, source, [])
- self.value = value
-
- def dumptree(self, treenodes, indent):
- if self.value:
- treenodes.append("%s='%s' (%d) " % (self.name, self.value, self.lineno))
- else:
- treenodes.append("'%s' (%d) " % (self.name, self.lineno))
-
- def __repr__(self):
- if self.value is not None:
- return "<%s=%s>" % ( self.name, repr(self.value))
- else:
- return "<%s!>" % (self.name,)
-
- def totuple(self, lineno=False):
- num = TOKEN_MAP.get(self.name, -1)
- if num == -1:
- print "Unknown", self.name, self.value
- if self.value is not None:
- val = self.value
- else:
- if self.name not in ("NEWLINE", "INDENT", "DEDENT", "ENDMARKER"):
- val = self.name
- else:
- val = self.value or ''
- if lineno:
- return (num, val, self.lineno)
- else:
- return (num, val)
Deleted: /pypy/dist/pypy/module/recparser/test_lookahead.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/test_lookahead.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,84 +0,0 @@
-from pypy.module.recparser.grammar import Alternative, Sequence, KleenStar, \
- Token, EmptyToken, build_first_sets
-
-class TestLookAheadBasics:
-
- def setup_method(self, method):
- self.tok1 = Token('t1', 'foo')
- self.tok2 = Token('t2', 'bar')
- self.tok3 = Token('t3', 'foobar')
- self.tokens = [self.tok1, self.tok2, self.tok3]
- build_first_sets(self.tokens)
-
- def test_basic_token(self):
- assert self.tok1.first_set == [self.tok1]
-
-
- def test_basic_alternative(self):
- alt = Alternative('alt', self.tokens)
- build_first_sets([alt])
- assert alt.first_set == self.tokens
-
-
- def test_basic_sequence(self):
- seq = Sequence('seq', self.tokens)
- build_first_sets([seq])
- assert seq.first_set == [self.tokens[0]]
-
- def test_basic_kleenstar(self):
- tok1, tok2, tok3 = self.tokens
- kstar = KleenStar('kstar', 1, 3, tok1)
- build_first_sets([kstar])
- assert kstar.first_set == [tok1]
- kstar = KleenStar('kstar', 0, 3, tok1)
- build_first_sets([kstar])
- assert kstar.first_set == [tok1, EmptyToken]
-
-
- def test_maybe_empty_sequence(self):
- """S -> tok1{0,2} tok2{0,2}
- ==> S.first_set = [tok1, tok2, EmptyToken]
- """
- tok1, tok2, tok3 = self.tokens
- k1 = KleenStar('k1', 0, 2, tok1)
- k2 = KleenStar('k1', 0, 2, tok2)
- seq = Sequence('seq', [k1, k2])
- build_first_sets([k1, k2, seq])
- assert seq.first_set == [tok1, tok2, EmptyToken]
-
-
- def test_not_empty_sequence(self):
- """S -> tok1{0,2} tok2{1,2}
- ==> S.first_set = [tok1, tok2]
- """
- tok1, tok2, tok3 = self.tokens
- k1 = KleenStar('k1', 0, 2, tok1)
- k2 = KleenStar('k1', 1, 2, tok2)
- seq = Sequence('seq', [k1, k2])
- build_first_sets([k1, k2, seq])
- assert seq.first_set == [tok1, tok2]
-
-def test_token_comparison():
- assert Token('t1', 'foo') == Token('t1', 'foo')
- assert Token('t1', 'foo') != Token('t2', 'foo')
- assert Token('t2', 'foo') != Token('t1', None)
-
-
-class TestLookAhead:
-
- def setup_method(self, method):
- self.LOW = Token('LOW', 'low')
- self.CAP = Token('CAP' ,'cap')
- self.A = Alternative('A', [])
- k1 = KleenStar('k1', 0, rule=self.LOW)
- k2 = KleenStar('k2', 0, rule=self.CAP)
- self.B = Sequence('B', [k1, self.A])
- self.C = Sequence('C', [k2, self.A])
- self.A.args = [self.B, self.C]
- build_first_sets([self.A, self.B, self.C, self.LOW, self.CAP, k1, k2])
-
- def test_S_first_set(self):
- for s in [Token('LOW', 'low'), EmptyToken, Token('CAP', 'cap')]:
- assert s in self.A.first_set
- assert s in self.B.first_set
- assert s in self.C.first_set
Deleted: /pypy/dist/pypy/module/recparser/tuplebuilder.py
==============================================================================
--- /pypy/dist/pypy/module/recparser/tuplebuilder.py Sun Jul 3 11:38:44 2005
+++ (empty file)
@@ -1,118 +0,0 @@
-
-from grammar import BaseGrammarBuilder
-from syntaxtree import TOKEN_MAP, SYMBOLS # , NT_OFFSET
-
-##
-## def _expand_nodes(nodes):
-## expanded = []
-## for n in nodes:
-## if n[0] == -2:
-## # expanded.extend(expand_nodes(n[1:]))
-## expanded.extend(n[1:])
-## else:
-## expanded.append(n)
-## return tuple(expanded)
-##
-## def expand_nodes(nodes):
-## r = _expand_nodes(nodes)
-## for n in nodes:
-## assert type(n[0]) == int
-## return r
-##
-
-class StackElement:
- """wraps TupleBuilder's tuples"""
-
-class Terminal(StackElement):
- def __init__(self, num, value, lineno=-1):
- self.nodes = [(num, value, lineno)]
- self.num = num
-
- def as_tuple(self, lineno=None):
- if lineno is not None:
- return self.nodes[0]
- else:
- return self.nodes[0][:-1]
-
-class NonTerminal(StackElement):
- def __init__(self, num, nodes, rulename=None):
- """rulename should always be None with regular Python grammar"""
- self.nodes = nodes
- self.num = num
-
- def as_tuple(self, lineno=None):
- l = [self.num] + [node.as_tuple(lineno) for node in self.nodes]
- return tuple(l)
-
-
-def expand_nodes(stack_elements):
- """generate a nested tuples from a list of stack elements"""
- expanded = []
- for element in stack_elements:
- if isinstance(element, NonTerminal) and element.num == -2:
- expanded.extend(element.nodes)
- else:
- expanded.append(element)
- return expanded
-
-class TupleBuilder(BaseGrammarBuilder):
- """A builder that directly produce the AST"""
-
- def __init__(self, rules=None, debug=0, lineno=True):
- BaseGrammarBuilder.__init__(self, rules, debug)
- # This attribute is here for convenience
- self.source_encoding = None
- self.lineno = lineno
- self._unknown = -10
-
- def _add_rule(self, rulename):
- SYMBOLS[rulename] = self._unknown
- self._unknown -= 1
-
- def alternative(self, rule, source):
- # Do nothing, keep rule on top of the stack
- if rule.is_root():
- nodes = expand_nodes( [self.stack[-1]] )
- if rule.name in SYMBOLS:
- self.stack[-1] = NonTerminal(SYMBOLS[rule.name], nodes)
- else:
- # Using regular CPython's Grammar should not lead here
- # XXX find how self._unknown is meant to be used
- self.stack[-1] = NonTerminal(self._unknown, nodes, rule.name)
- self._add_rule(rule.name)
- return True
-
- def sequence(self, rule, source, elts_number):
- """ """
- if rule.is_root():
- if rule.name in SYMBOLS:
- num = SYMBOLS[rule.name]
- node = [num]
- else:
- num = self._unknown
- node = [num]
- self._add_rule(rule.name)
- else:
- num = -2
- node = [num]
- if elts_number > 0:
- sequence_elements = self.stack[-elts_number:]
- nodes = expand_nodes( sequence_elements )
- self.stack[-elts_number:] = [NonTerminal(num, nodes)]
- else:
- self.stack.append( NonTerminal(num, []) )
- return True
-
- def token(self, name, value, source):
- num = TOKEN_MAP.get(name, -1)
- lineno = source.current_line()
- if value is None:
- if name not in ("NEWLINE", "INDENT", "DEDENT", "ENDMARKER"):
- value = name
- else:
- value = ''
- if self.lineno:
- self.stack.append( Terminal(num, value, lineno) )
- else:
- self.stack.append( Terminal(num, value, -1) )
- return True
More information about the Pypy-commit
mailing list