[pypy-svn] r20942 - in pypy/dist/pypy: annotation interpreter/pyparser module/_socket/rpython module/recparser module/recparser/test rpython translator/goal
ludal at codespeak.net
ludal at codespeak.net
Fri Dec 9 16:08:00 CET 2005
Author: ludal
Date: Fri Dec 9 16:07:56 2005
New Revision: 20942
Added:
pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py
pypy/dist/pypy/translator/goal/targetebnflexer.py
Modified:
pypy/dist/pypy/annotation/bookkeeper.py
pypy/dist/pypy/annotation/builtin.py
pypy/dist/pypy/interpreter/pyparser/ebnflexer.py
pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
pypy/dist/pypy/interpreter/pyparser/grammar.py
pypy/dist/pypy/interpreter/pyparser/pysymbol.py
pypy/dist/pypy/interpreter/pyparser/syntaxtree.py
pypy/dist/pypy/module/_socket/rpython/rsocket.py
pypy/dist/pypy/module/recparser/pyparser.py
pypy/dist/pypy/module/recparser/test/test_compilehooks.py
pypy/dist/pypy/rpython/rbuiltin.py
Log:
first steps into making the ebnf parser translatable
- the lexer translates
- some cleanup/reorg for the next part
moved Typedefs for grammar object into the recparser module
allows object.__init__ to be ignored by the annotator/rtyper
Modified: pypy/dist/pypy/annotation/bookkeeper.py
==============================================================================
--- pypy/dist/pypy/annotation/bookkeeper.py (original)
+++ pypy/dist/pypy/annotation/bookkeeper.py Fri Dec 9 16:07:56 2005
@@ -348,7 +348,8 @@
result.dictdef.generalize_key(self.immutablevalue(ek))
result.dictdef.generalize_value(self.immutablevalue(ev))
elif ishashable(x) and x in BUILTIN_ANALYZERS:
- result = SomeBuiltin(BUILTIN_ANALYZERS[x], methodname="%s.%s" % (x.__module__, x.__name__))
+ _module = getattr(x,"__module__","unknown")
+ result = SomeBuiltin(BUILTIN_ANALYZERS[x], methodname="%s.%s" % (_module, x.__name__))
elif tp in EXTERNAL_TYPE_ANALYZERS:
result = SomeExternalObject(tp)
elif isinstance(x, lltype._ptr):
Modified: pypy/dist/pypy/annotation/builtin.py
==============================================================================
--- pypy/dist/pypy/annotation/builtin.py (original)
+++ pypy/dist/pypy/annotation/builtin.py Fri Dec 9 16:07:56 2005
@@ -233,6 +233,11 @@
def exception_init(s_self, *args):
pass # XXX check correctness of args, maybe
+def object_init(s_self, *args):
+ # ignore - mostly used for abstract classes initialization
+ pass
+
+
def count(s_obj):
return SomeInteger()
@@ -339,6 +344,9 @@
import unicodedata
BUILTIN_ANALYZERS[unicodedata.decimal] = unicodedata_decimal # xxx
+# object - just ignore object.__init__
+BUILTIN_ANALYZERS[object.__init__] = object_init
+
# import
BUILTIN_ANALYZERS[__import__] = import_func
Added: pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py Fri Dec 9 16:07:56 2005
@@ -0,0 +1,109 @@
+# This module contains the grammar parser
+# and the symbol mappings
+
+from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
+ KleeneStar, GrammarElement, build_first_sets, EmptyToken
+
+
+sym_map = {}
+sym_rmap = {}
+_count = 0
+
+def g_add_symbol( name ):
+ global _count
+ if name in sym_rmap:
+ return sym_rmap[name]
+ val = _count
+ _count += 1
+ sym_map[val] = name
+ sym_rmap[name] = val
+ return val
+
+
+tok_map = {}
+tok_rmap = {}
+
+def g_add_token( **kwargs ):
+ global _count
+ assert len(kwargs) == 1
+ sym, name = kwargs.popitem()
+ if name in tok_rmap:
+ return tok_rmap[name]
+ val = _count
+ _count += 1
+ tok_map[val] = name
+ tok_rmap[name] = val
+ sym_map[val] = sym
+ sym_rmap[sym] = val
+ return val
+
+g_add_token( EOF='EOF' )
+
+
+def grammar_grammar():
+ """NOT RPYTHON (mostly because of g_add_token I suppose)
+ Builds the grammar for the grammar file
+
+ Here's the description of the grammar's grammar ::
+
+ grammar: rule+
+ rule: SYMDEF alternative
+
+ alternative: sequence ( '|' sequence )+
+ star: '*' | '+'
+ sequence: (SYMBOL star? | STRING | option | group star? )+
+ option: '[' alternative ']'
+ group: '(' alternative ')' star?
+ """
+ global sym_map
+ S = g_add_symbol
+ T = g_add_token
+ # star: '*' | '+'
+ star = Alternative( S("star"), [Token(T(TOK_STAR='*')), Token(T(TOK_ADD='+'))] )
+ star_opt = KleeneStar ( S("star_opt"), 0, 1, rule=star )
+
+ # rule: SYMBOL ':' alternative
+ symbol = Sequence( S("symbol"), [Token(T(TOK_SYMBOL='SYMBOL')), star_opt] )
+ symboldef = Token( T(TOK_SYMDEF="SYMDEF") )
+ alternative = Sequence( S("alternative"), [])
+ rule = Sequence( S("rule"), [symboldef, alternative] )
+
+ # grammar: rule+
+ grammar = KleeneStar( S("grammar"), _min=1, rule=rule )
+
+ # alternative: sequence ( '|' sequence )*
+ sequence = KleeneStar( S("sequence"), 1 )
+ seq_cont_list = Sequence( S("seq_cont_list"), [Token(T(TOK_BAR='|')), sequence] )
+ sequence_cont = KleeneStar( S("sequence_cont"),0, rule=seq_cont_list )
+
+ alternative.args = [ sequence, sequence_cont ]
+
+ # option: '[' alternative ']'
+ option = Sequence( S("option"), [Token(T(TOK_LBRACKET='[')), alternative, Token(T(TOK_RBRACKET=']'))] )
+
+ # group: '(' alternative ')'
+ group = Sequence( S("group"), [Token(T(TOK_LPAR='(')), alternative, Token(T(TOK_RPAR=')')), star_opt] )
+
+ # sequence: (SYMBOL | STRING | option | group )+
+ string = Token(T(TOK_STRING='STRING'))
+ alt = Alternative( S("sequence_alt"), [symbol, string, option, group] )
+ sequence.args = [ alt ]
+
+
+ rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
+ seq_cont_list, sequence_cont, option, group, alt ]
+ build_first_sets( rules )
+ return grammar
+
+
+GRAMMAR_GRAMMAR = grammar_grammar()
+
+for _sym, _value in sym_rmap.items():
+ globals()[_sym] = _value
+
+# cleanup
+del _sym
+del _value
+del grammar_grammar
+del g_add_symbol
+del g_add_token
Modified: pypy/dist/pypy/interpreter/pyparser/ebnflexer.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/ebnflexer.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnflexer.py Fri Dec 9 16:07:56 2005
@@ -3,18 +3,21 @@
analyser in grammar.py
"""
-import re
from grammar import TokenSource, Token
+from ebnfgrammar import *
-## Lexer for Python's grammar ########################################
-g_symdef = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*:",re.M)
-g_symbol = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*",re.M)
-g_string = re.compile(r"'[^']+'",re.M)
-g_tok = re.compile(r"\[|\]|\(|\)|\*|\+|\|",re.M)
-g_skip = re.compile(r"\s*(#.*$)?",re.M)
+
+def match_symbol( input, start, stop ):
+ idx = start
+ while idx<stop:
+ if input[idx] not in "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789":
+ break
+ idx+=1
+ return idx
class GrammarSource(TokenSource):
- """The grammar tokenizer
+ """Fully RPython - see targetebnflexer.py
+ The grammar tokenizer
It knows only 5 types of tokens:
EOF: end of file
SYMDEF: a symbol definition e.g. "file_input:"
@@ -22,13 +25,12 @@
SYMBOL: a rule symbol usually appeary right of a SYMDEF
tokens: '[', ']', '(' ,')', '*', '+', '|'
"""
- def __init__(self, inpstring, tokenmap ):
+ def __init__(self, inpstring ):
TokenSource.__init__(self)
self.input = inpstring
self.pos = 0
self.begin = 0
self._peeked = None
- self.tokmap = tokenmap
self.current_line = 1
def context(self):
@@ -50,19 +52,71 @@
self.pos, self._peeked = ctx
def current_line(self):
- end = self.input.find("\n",self.pos)
- return self.input[self.begin:self.pos]
+ pos = idx = self.begin
+ inp = self.input
+ end = len(inp)
+ while idx<end:
+ chr = inp[idx]
+ if chr=="\n":
+ break
+ idx+=1
+ return self.input[pos:idx]
def current_lineno(self):
return self.current_line
+
+ def skip_empty_lines(self, input, start, end ):
+ idx = start
+ # assume beginning of a line
+ while idx<end:
+ chr = input[idx]
+ if chr not in " \t#\n":
+ break
+ idx += 1
+ if chr=="#":
+ # skip to end of line
+ while idx<end:
+ chr = input[idx]
+ idx+= 1
+ if chr=="\n":
+ self.begin = idx
+ self.current_line+=1
+ break
+ continue
+ elif chr=="\n":
+ self.begin = idx
+ self.current_line+=1
+ return idx
+
+ def match_string( self, input, start, stop ):
+ if input[start]!="'":
+ return start
+ idx = start + 1
+ while idx<stop:
+ chr = input[idx]
+ idx = idx + 1
+ if chr == "'":
+ break
+ if chr == "\n":
+ self.current_line += 1
+ self.begin = idx
+ break
+ return idx
+
+
+ def RaiseError( self, msg ):
+ errmsg = msg + " at line=%d" % self.current_line
+ errmsg += " at pos=%d" % (self.pos-self.begin)
+ errmsg += " context='" + self.input[self.pos:self.pos+20]
+ raise ValueError( errmsg )
+
def next(self):
"""returns the next token"""
# We only support 1-lookahead which
# means backtracking more than one token
# will re-tokenize the stream (but this is the
# grammar lexer so we don't care really!)
- T = self.tokmap
if self._peeked is not None:
peeked = self._peeked
self._peeked = None
@@ -70,37 +124,38 @@
pos = self.pos
inp = self.input
- m = g_skip.match(inp, pos)
- while m and pos!=m.end():
- self.current_line+=m.group().count("\n")
- pos = m.end()
- if pos==len(inp):
- self.pos = pos
- return Token(T["EOF"], None)
- m = g_skip.match(inp, pos)
- m = g_symdef.match(inp,pos)
- if m:
- tk = m.group(0)
- self.begin = self.pos
- self.pos = m.end()
- return Token(T['SYMDEF'],tk[:-1])
- m = g_tok.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token(T[tk],tk)
- m = g_string.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token(T['STRING'],tk[1:-1])
- m = g_symbol.match(inp,pos)
- if m:
- tk = m.group(0)
- self.pos = m.end()
- return Token(T['SYMBOL'],tk)
- raise ValueError("Unknown token at pos=%d context='%s'" %
- (pos,inp[pos:pos+20]) )
+ end = len(self.input)
+ pos = self.skip_empty_lines(inp,pos,end)
+ if pos==end:
+ return Token(EOF, None)
+
+ # at this point nextchar is not a white space nor \n
+ nextchr = inp[pos]
+ if nextchr=="'":
+ npos = self.match_string( inp, pos, end)
+ # could get a string terminated by EOF here
+ if npos==end and inp[end-1]!="'":
+ self.RaiseError("Unterminated string")
+ self.pos = npos
+ _endpos = npos - 1
+ assert _endpos>=0
+ return Token(TOK_STRING,inp[pos+1:_endpos])
+ else:
+ npos = match_symbol( inp, pos, end)
+ if npos!=pos:
+ self.pos = npos
+ if npos!=end and inp[npos]==":":
+ self.pos += 1
+ return Token(TOK_SYMDEF,inp[pos:npos])
+ else:
+ return Token(TOK_SYMBOL,inp[pos:npos])
+
+ # we still have pos!=end here
+ chr = inp[pos]
+ if chr in "[]()*+|":
+ self.pos = pos+1
+ return Token(tok_rmap[chr], chr)
+ self.RaiseError( "Unknown token" )
def peek(self):
"""take a peek at the next token"""
@@ -113,3 +168,21 @@
"""A simple helper function returning the stream at the last
parsed position"""
return self.input[self.pos:self.pos+N]
+
+
+# a simple target used to annotate/translate the tokenizer
+def target_parse_input( txt ):
+ lst = []
+ src = GrammarSource( txt )
+ while 1:
+ x = src.next()
+ lst.append( x )
+ if x.codename == EOF:
+ break
+ #return lst
+
+if __name__ == "__main__":
+ import sys
+ f = file(sys.argv[-1])
+ lst = target_parse_input( f.read() )
+ for i in lst: print i
Modified: pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnfparse.py Fri Dec 9 16:07:56 2005
@@ -2,6 +2,7 @@
from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
KleeneStar, GrammarElement, build_first_sets, EmptyToken
from ebnflexer import GrammarSource
+from ebnfgrammar import GRAMMAR_GRAMMAR, sym_map
from syntaxtree import AbstractSyntaxVisitor
import pytoken
import pysymbol
@@ -16,13 +17,6 @@
'%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':',
'@', '\\[', '\\]', '`', '\\{', '\\}']
-py_punct = re.compile(r"""
->=|<>|!=|<|>|<=|==|~|
-\*=|//=|%=|\^=|<<=|\*\*=|\|=|\+=|>>=|=|&=|/=|-=|
-,|\^|>>|&|\+|\*|-|/|\.|\*\*|%|<<|//|\||
-\)|\(|;|:|@|\[|\]|`|\{|\}
-""", re.M | re.X)
-
TERMINALS = [
'NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
@@ -188,7 +182,7 @@
rule = node.nodes[1].visit(self)
return self.repeat( node.nodes[3], rule )
- def handle_STRING( self, node ):
+ def handle_TOK_STRING( self, node ):
value = node.value
tokencode = pytoken.tok_punct.get( value )
if tokencode is None:
@@ -224,76 +218,6 @@
% tok.value)
return myrule
-rules = None
-
-sym_map = {}
-sym_rmap = {}
-sym_count = 0
-
-def g_add_symbol( name ):
- global sym_count
- if name in sym_rmap:
- return sym_rmap[name]
- val = sym_count
- sym_count += 1
- sym_map[val] = name
- sym_rmap[name] = val
- return val
-
-g_add_symbol( 'EOF' )
-
-def grammar_grammar():
- """Builds the grammar for the grammar file
-
- Here's the description of the grammar's grammar ::
-
- grammar: rule+
- rule: SYMDEF alternative
-
- alternative: sequence ( '|' sequence )+
- star: '*' | '+'
- sequence: (SYMBOL star? | STRING | option | group star? )+
- option: '[' alternative ']'
- group: '(' alternative ')' star?
- """
- global rules, sym_map
- S = g_add_symbol
- # star: '*' | '+'
- star = Alternative( S("star"), [Token(S('*')), Token(S('+'))] )
- star_opt = KleeneStar ( S("star_opt"), 0, 1, rule=star )
-
- # rule: SYMBOL ':' alternative
- symbol = Sequence( S("symbol"), [Token(S('SYMBOL')), star_opt] )
- symboldef = Token( S("SYMDEF") )
- alternative = Sequence( S("alternative"), [])
- rule = Sequence( S("rule"), [symboldef, alternative] )
-
- # grammar: rule+
- grammar = KleeneStar( S("grammar"), _min=1, rule=rule )
-
- # alternative: sequence ( '|' sequence )*
- sequence = KleeneStar( S("sequence"), 1 )
- seq_cont_list = Sequence( S("seq_cont_list"), [Token(S('|')), sequence] )
- sequence_cont = KleeneStar( S("sequence_cont"),0, rule=seq_cont_list )
-
- alternative.args = [ sequence, sequence_cont ]
-
- # option: '[' alternative ']'
- option = Sequence( S("option"), [Token(S('[')), alternative, Token(S(']'))] )
-
- # group: '(' alternative ')'
- group = Sequence( S("group"), [Token(S('(')), alternative, Token(S(')')), star_opt] )
-
- # sequence: (SYMBOL | STRING | option | group )+
- string = Token(S('STRING'))
- alt = Alternative( S("sequence_alt"), [symbol, string, option, group] )
- sequence.args = [ alt ]
-
-
- rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
- seq_cont_list, sequence_cont, option, group, alt ]
- build_first_sets( rules )
- return grammar
def parse_grammar(stream):
@@ -301,15 +225,27 @@
stream : file-like object representing the grammar to parse
"""
- source = GrammarSource(stream.read(), sym_rmap)
- rule = grammar_grammar()
+ source = GrammarSource(stream.read())
builder = BaseGrammarBuilder()
- result = rule.match(source, builder)
+ result = GRAMMAR_GRAMMAR.match(source, builder)
node = builder.stack[-1]
vis = EBNFVisitor()
node.visit(vis)
return vis
+def parse_grammar_text(txt):
+ """parses a grammar input
+
+ stream : file-like object representing the grammar to parse
+ """
+ source = GrammarSource(txt)
+ builder = BaseGrammarBuilder()
+ result = GRAMMAR_GRAMMAR.match(source, builder)
+ node = builder.stack[-1]
+ vis = EBNFVisitor()
+ node.visit(vis)
+ return vis
+
from pprint import pprint
if __name__ == "__main__":
Modified: pypy/dist/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/grammar.py Fri Dec 9 16:07:56 2005
@@ -7,11 +7,16 @@
KleeneStar : as in S -> A* or S -> A+
Token : a lexer token
"""
-from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.typedef import TypeDef
-from pypy.interpreter.gateway import interp2app, ObjSpace, W_Root
-from pypy.interpreter.argument import Arguments
-from pypy.interpreter.error import OperationError
+try:
+ from pypy.interpreter.baseobjspace import Wrappable
+ from pypy.interpreter.pyparser.pytoken import NULLTOKEN
+except ImportError:
+ # allows standalone testing
+ Wrappable = object
+ NULLTOKEN = None
+
+from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
+
DEBUG = 0
USE_LOOKAHEAD = True
@@ -26,6 +31,7 @@
#### Abstract interface for a lexer/tokenizer
class TokenSource(object):
"""Abstract base class for a source tokenizer"""
+
def context(self):
"""Returns a context to restore the state of the object later"""
@@ -123,7 +129,6 @@
def token(self, name, value, source):
return False
-from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
#
# we use the term root for a grammar rule to specify rules that are given a name
# by the grammar
@@ -349,20 +354,7 @@
pass
- def descr_repr( self, space ):
- """TODO: make __repr__ RPython"""
- import pysymbol
- return space.wrap( self.display(0, pysymbol.sym_name) )
-
- def descr_get_children( self, space ):
- return space.newlist( [ space.wrap(it) for it in self.args ] )
-
-GrammarElement.typedef = TypeDef( "GrammarElement",
- #__repr__ = interp2app(GrammarElement.descr_repr,
- # unwrap_spec=['self', ObjSpace] ),
- get_children = interp2app(GrammarElement.descr_get_children,
- unwrap_spec=['self', ObjSpace] ),
- )
+
class Alternative(GrammarElement):
"""Represents an alternative in a grammar rule (as in S -> A | B | C)"""
@@ -460,47 +452,8 @@
return True
return False
- def descr_alternative_append( self, space, w_rule ):
- rule = space.interpclass_w(w_rule)
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- self.args.append( rule )
-
- def descr_alternative___getitem__(self, space, idx ):
- return space.wrap(self.args[idx])
-
- def descr_alternative___setitem__(self, space, idx, w_rule ):
- rule = space.interpclass_w(w_rule)
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- return space.wrap( self.args[idx] )
-
- def descr_alternative___delitem__(self, space, idx ):
- del self.args[idx]
-
- def descr_alternative_insert(self, space, idx, w_rule ):
- rule = space.interpclass_w(w_rule)
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- if idx<0 or idx>len(self.args):
- raise OperationError( space.w_IndexError, space.wrap("Invalid index") )
- self.args.insert( idx, rule )
-
-Alternative.typedef = TypeDef("Alternative", GrammarElement.typedef,
- __getitem__ = interp2app( Alternative.descr_alternative___getitem__,
- unwrap_spec=['self',ObjSpace,int]),
- __setitem__ = interp2app( Alternative.descr_alternative___setitem__,
- unwrap_spec=['self',ObjSpace,int,W_Root]),
- __delitem__ = interp2app( Alternative.descr_alternative___delitem__,
- unwrap_spec=['self',ObjSpace,int]),
- insert = interp2app( Alternative.descr_alternative_insert,
- unwrap_spec = ['self', ObjSpace, int, W_Root ] ),
- append = interp2app( Alternative.descr_alternative_append,
- unwrap_spec = ['self', ObjSpace, W_Root ] ),
- )
-
class Sequence(GrammarElement):
"""Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
def __init__(self, name, args):
@@ -574,46 +527,9 @@
return False
return True
- def descr_alternative_append( self, space, w_rule ):
- rule = space.interpclass_w(w_rule)
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- self.args.append( rule )
-
- def descr_alternative___getitem__(self, space, idx ):
- return space.wrap(self.args[idx])
-
- def descr_alternative___setitem__(self, space, idx, w_rule ):
- rule = space.interpclass_w(w_rule)
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- return space.wrap( self.args[idx] )
-
- def descr_alternative___delitem__(self, space, idx ):
- del self.args[idx]
-
- def descr_alternative_insert(self, space, idx, w_rule ):
- rule = space.interpclass_w(w_rule)
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- if idx<0 or idx>len(self.args):
- raise OperationError( space.w_IndexError, space.wrap("Invalid index") )
- self.args.insert( idx, rule )
-Sequence.typedef = TypeDef("Sequence", GrammarElement.typedef,
- __getitem__ = interp2app( Sequence.descr_alternative___getitem__,
- unwrap_spec=['self',ObjSpace,int]),
- __setitem__ = interp2app( Sequence.descr_alternative___setitem__,
- unwrap_spec=['self',ObjSpace,int,W_Root]),
- __delitem__ = interp2app( Sequence.descr_alternative___delitem__,
- unwrap_spec=['self',ObjSpace,int]),
- insert = interp2app( Sequence.descr_alternative_insert,
- unwrap_spec = ['self', ObjSpace, int, W_Root ] ),
- append = interp2app( Sequence.descr_alternative_append,
- unwrap_spec = ['self', ObjSpace, W_Root ] ),
- )
class KleeneStar(GrammarElement):
@@ -706,28 +622,6 @@
return False
return True
- def descr_kleenestar___getitem__(self, space, idx ):
- if idx!=0:
- raise OperationError( space.w_ValueError, space.wrap("KleeneStar only support one child"))
- return space.wrap(self.args[idx])
-
- def descr_kleenestar___setitem__(self, space, idx, w_rule ):
- rule = space.interpclass_w(w_rule)
- if idx!=0:
- raise OperationError( space.w_ValueError, space.wrap("KleeneStar only support one child"))
- if not isinstance( rule, GrammarElement ):
- raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
- self.args[idx] = rule
-
-
-
-KleeneStar.typedef = TypeDef("KleeneStar", GrammarElement.typedef,
- __getitem__ = interp2app(KleeneStar.descr_kleenestar___getitem__,
- unwrap_spec=[ 'self', ObjSpace, int]),
- __setitem__ = interp2app(KleeneStar.descr_kleenestar___setitem__,
- unwrap_spec=[ 'self', ObjSpace, int, W_Root ]),
- )
-
class Token(GrammarElement):
"""Represents a Token in a grammar rule (a lexer token)"""
@@ -804,9 +698,7 @@
return True
return False
-Token.typedef = TypeDef("Token", GrammarElement.typedef )
-from pypy.interpreter.pyparser.pytoken import NULLTOKEN
EmptyToken = Token(NULLTOKEN, None)
Modified: pypy/dist/pypy/interpreter/pyparser/pysymbol.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pysymbol.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pysymbol.py Fri Dec 9 16:07:56 2005
@@ -1,5 +1,9 @@
# replacement for the CPython symbol module
-from pypy.interpreter.pyparser import symbol
+try:
+ from pypy.interpreter.pyparser import symbol
+except ImportError:
+ # for standalone testing
+ import symbol
# try to avoid numeric values conflict with tokens
# it's important for CPython, but I'm not so sure it's still
Modified: pypy/dist/pypy/interpreter/pyparser/syntaxtree.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/syntaxtree.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/syntaxtree.py Fri Dec 9 16:07:56 2005
@@ -1,7 +1,11 @@
"""SyntaxTree class definition"""
-from pypy.interpreter.pyparser.pysymbol import sym_values
-from pypy.interpreter.pyparser.pytoken import tok_values
-
+try:
+ from pypy.interpreter.pyparser.pysymbol import sym_values
+ from pypy.interpreter.pyparser.pytoken import tok_values
+except ImportError:
+ from pysymbol import sym_values
+ from pytoken import tok_values
+
class AbstractSyntaxVisitor(object):
def visit_syntaxnode( self, node ):
pass
Modified: pypy/dist/pypy/module/_socket/rpython/rsocket.py
==============================================================================
--- pypy/dist/pypy/module/_socket/rpython/rsocket.py (original)
+++ pypy/dist/pypy/module/_socket/rpython/rsocket.py Fri Dec 9 16:07:56 2005
@@ -5,7 +5,7 @@
import socket
# HACK: We have to prevent GC to collect the socket object we create within this
-#Êmodule. Because socket.close() is called on GC this can lead to strange
+# module. Because socket.close() is called on GC this can lead to strange
# effects in corner cases where file descriptors are reused.
socket_cache = {}
keep_sockets_alive = []
Modified: pypy/dist/pypy/module/recparser/pyparser.py
==============================================================================
--- pypy/dist/pypy/module/recparser/pyparser.py (original)
+++ pypy/dist/pypy/module/recparser/pyparser.py Fri Dec 9 16:07:56 2005
@@ -11,6 +11,8 @@
from pypy.interpreter.pyparser.pythonutil import PYTHON_PARSER
from pypy.interpreter.pyparser.error import SyntaxError
from pypy.interpreter.pyparser import grammar, pysymbol, pytoken
+from pypy.interpreter.argument import Arguments
+
__all__ = [ "ASTType", "STType", "suite", "expr" ]
@@ -206,3 +208,121 @@
encoding = None
return parsestr(space, encoding, s)
decode_string_literal.unwrap_spec = [ObjSpace, str, W_Root]
+
+
+# append typedefs to the grammar objects
+from pypy.interpreter.pyparser.grammar import GrammarElement, Alternative
+from pypy.interpreter.pyparser.grammar import Sequence, KleeneStar, Token
+
+
+def descr_grammarelement_repr( self, space ):
+ """TODO: make __repr__ RPython"""
+ import pysymbol
+ return space.wrap( self.display(0, pysymbol.sym_name) )
+
+def descr_grammarelement_get_children( self, space ):
+ return space.newlist( [ space.wrap(it) for it in self.args ] )
+
+GrammarElement.descr_grammarelement_repr = descr_grammarelement_repr
+GrammarElement.descr_grammarelement_get_children = descr_grammarelement_get_children
+
+GrammarElement.typedef = TypeDef( "GrammarElement",
+ #__repr__ = interp2app(GrammarElement.descr_grammarelement_repr,
+ # unwrap_spec=['self', ObjSpace] ),
+ get_children = interp2app(GrammarElement.descr_grammarelement_get_children,
+ unwrap_spec=['self', ObjSpace] ),
+ )
+
+
+
+def descr_alternative_append( self, space, w_rule ):
+ rule = space.interpclass_w(w_rule)
+ if not isinstance( rule, GrammarElement ):
+ raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
+ self.args.append( rule )
+
+
+def descr_alternative___getitem__(self, space, idx ):
+ return space.wrap(self.args[idx])
+
+def descr_alternative___setitem__(self, space, idx, w_rule ):
+ rule = space.interpclass_w(w_rule)
+ if not isinstance( rule, GrammarElement ):
+ raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
+ return space.wrap( self.args[idx] )
+
+def descr_alternative___delitem__(self, space, idx ):
+ del self.args[idx]
+
+def descr_alternative_insert(self, space, idx, w_rule ):
+ rule = space.interpclass_w(w_rule)
+ if not isinstance( rule, GrammarElement ):
+ raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
+ if idx<0 or idx>len(self.args):
+ raise OperationError( space.w_IndexError, space.wrap("Invalid index") )
+ self.args.insert( idx, rule )
+
+Alternative.descr_alternative_append = descr_alternative_append
+Alternative.descr_alternative_insert = descr_alternative_insert
+Alternative.descr_alternative___getitem__ = descr_alternative___getitem__
+Alternative.descr_alternative___setitem__ = descr_alternative___setitem__
+Alternative.descr_alternative___delitem__ = descr_alternative___delitem__
+
+
+Alternative.typedef = TypeDef("Alternative", GrammarElement.typedef,
+ __getitem__ = interp2app( Alternative.descr_alternative___getitem__,
+ unwrap_spec=['self',ObjSpace,int]),
+ __setitem__ = interp2app( Alternative.descr_alternative___setitem__,
+ unwrap_spec=['self',ObjSpace,int,W_Root]),
+ __delitem__ = interp2app( Alternative.descr_alternative___delitem__,
+ unwrap_spec=['self',ObjSpace,int]),
+ insert = interp2app( Alternative.descr_alternative_insert,
+ unwrap_spec = ['self', ObjSpace, int, W_Root ] ),
+ append = interp2app( Alternative.descr_alternative_append,
+ unwrap_spec = ['self', ObjSpace, W_Root ] ),
+ )
+
+Sequence.descr_alternative_append = descr_alternative_append
+Sequence.descr_alternative_insert = descr_alternative_insert
+Sequence.descr_alternative___getitem__ = descr_alternative___getitem__
+Sequence.descr_alternative___setitem__ = descr_alternative___setitem__
+Sequence.descr_alternative___delitem__ = descr_alternative___delitem__
+
+
+Sequence.typedef = TypeDef("Sequence", GrammarElement.typedef,
+ __getitem__ = interp2app( Sequence.descr_alternative___getitem__,
+ unwrap_spec=['self',ObjSpace,int]),
+ __setitem__ = interp2app( Sequence.descr_alternative___setitem__,
+ unwrap_spec=['self',ObjSpace,int,W_Root]),
+ __delitem__ = interp2app( Sequence.descr_alternative___delitem__,
+ unwrap_spec=['self',ObjSpace,int]),
+ insert = interp2app( Sequence.descr_alternative_insert,
+ unwrap_spec = ['self', ObjSpace, int, W_Root ] ),
+ append = interp2app( Sequence.descr_alternative_append,
+ unwrap_spec = ['self', ObjSpace, W_Root ] ),
+ )
+
+def descr_kleenestar___getitem__(self, space, idx ):
+ if idx!=0:
+ raise OperationError( space.w_ValueError, space.wrap("KleeneStar only support one child"))
+ return space.wrap(self.args[idx])
+
+def descr_kleenestar___setitem__(self, space, idx, w_rule ):
+ rule = space.interpclass_w(w_rule)
+ if idx!=0:
+ raise OperationError( space.w_ValueError, space.wrap("KleeneStar only support one child"))
+ if not isinstance( rule, GrammarElement ):
+ raise OperationError( space.w_TypeError, space.wrap("Need a GrammarElement instance") )
+ self.args[idx] = rule
+
+KleeneStar.descr_kleenestar___getitem__ = descr_kleenestar___getitem__
+KleeneStar.descr_kleenestar___setitem__ = descr_kleenestar___setitem__
+
+KleeneStar.typedef = TypeDef("KleeneStar", GrammarElement.typedef,
+ __getitem__ = interp2app(KleeneStar.descr_kleenestar___getitem__,
+ unwrap_spec=[ 'self', ObjSpace, int]),
+ __setitem__ = interp2app(KleeneStar.descr_kleenestar___setitem__,
+ unwrap_spec=[ 'self', ObjSpace, int, W_Root ]),
+ )
+
+Token.typedef = TypeDef("Token", GrammarElement.typedef )
Modified: pypy/dist/pypy/module/recparser/test/test_compilehooks.py
==============================================================================
--- pypy/dist/pypy/module/recparser/test/test_compilehooks.py (original)
+++ pypy/dist/pypy/module/recparser/test/test_compilehooks.py Fri Dec 9 16:07:56 2005
@@ -26,3 +26,32 @@
d = {}
exec "a = 3" in d
assert d['a'] == 2 # well, yes ...
+
+
+class DISABLEDAppTest_GlobalsAsConsts:
+ def test_ast_parser(self):
+ # define the hook
+ def change_globals(ast, enc):
+ class ChangeGlobalsVisitor:
+ def visitConst(self, node):
+ pass
+
+ def defaultvisit(self, node):
+ for child in node.getChildNodes():
+ child.accept(self)
+
+ def __getattr__(self, attrname):
+ if attrname.startswith('visit'):
+ return self.defaultvisit
+ raise AttributeError(attrname)
+
+ ast.accept(ChangeConstVisitor())
+ return ast
+
+ # install the hook
+ import parser
+ parser.install_compiler_hook(change_globals)
+ # check that the visitor changed all globals
+ # in the code into Consts
+ # TODO
+ # simplest version of the test : dis(code) | grep -v LOAD_GLOBAL == dis(code)
Modified: pypy/dist/pypy/rpython/rbuiltin.py
==============================================================================
--- pypy/dist/pypy/rpython/rbuiltin.py (original)
+++ pypy/dist/pypy/rpython/rbuiltin.py Fri Dec 9 16:07:56 2005
@@ -179,6 +179,9 @@
def rtype_Exception__init__(hop):
pass
+def rtype_object__init__(hop):
+ pass
+
def rtype_OSError__init__(hop):
if hop.nb_args == 2:
raise TyperError("OSError() should not be called with "
@@ -241,6 +244,7 @@
BUILTIN_TYPER[Exception.__init__.im_func] = rtype_Exception__init__
BUILTIN_TYPER[AssertionError.__init__.im_func] = rtype_Exception__init__
BUILTIN_TYPER[OSError.__init__.im_func] = rtype_OSError__init__
+BUILTIN_TYPER[object.__init__] = rtype_object__init__
# annotation of low-level types
def rtype_malloc(hop):
Added: pypy/dist/pypy/translator/goal/targetebnflexer.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/goal/targetebnflexer.py Fri Dec 9 16:07:56 2005
@@ -0,0 +1,33 @@
+from pypy.interpreter.pyparser.ebnflexer import target_parse_input
+
+
+entry_point = target_parse_input
+
+# _____ Define and setup target ___
+
+def target(*args):
+ return entry_point, [str]
+
+def get_llinterp_args():
+ return [1]
+
+# _____ Run translated _____
+def run(c_entry_point):
+ import sys
+ NBC=100
+ import time
+ src = file("../../interpreter/pyparser/data/Grammar2.4").read()
+ print "Translated:"
+ t1 = time.time()
+ for i in range(NBC):
+ c_entry_point( src )
+ t2 = time.time()
+ print "%8.5f sec/loop" % (float(t2-t1)/NBC)
+ print "CPython:"
+ t1 = time.time()
+ for i in range(NBC):
+ entry_point( src )
+ t2 = time.time()
+ print "%8.5f sec/loop" % (float(t2-t1)/NBC)
+
+
More information about the Pypy-commit
mailing list