[pypy-svn] r50343 - pypy/dist/pypy/rlib/parsing
cfbolz at codespeak.net
cfbolz at codespeak.net
Fri Jan 4 23:45:01 CET 2008
Author: cfbolz
Date: Fri Jan 4 23:45:01 2008
New Revision: 50343
Modified:
pypy/dist/pypy/rlib/parsing/lexer.py
Log:
some refactoring to the lexing code to be a bit more flexible. My Python lexer
needs this.
Modified: pypy/dist/pypy/rlib/parsing/lexer.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/lexer.py (original)
+++ pypy/dist/pypy/rlib/parsing/lexer.py Fri Jan 4 23:45:01 2008
@@ -89,15 +89,14 @@
self.ignore = ignore
self.matcher = matcher
-class LexingDFARunner(deterministic.DFARunner):
+class AbstractLexingDFARunner(deterministic.DFARunner):
i = 0
- def __init__(self, matcher, automaton, text, ignore, eof=False):
+ def __init__(self, matcher, automaton, text, eof=False):
self.automaton = automaton
self.state = 0
self.text = text
self.last_matched_state = 0
self.last_matched_index = -1
- self.ignore = ignore
self.eof = eof
self.matcher = matcher
self.lineno = 0
@@ -112,8 +111,7 @@
if i == len(self.text):
if self.eof:
self.last_matched_index += 1
- return Token("EOF", "EOF",
- SourcePos(i, self.lineno, self.columnno))
+ return self.make_token(i, -1, "", eof=True)
else:
raise StopIteration
if i >= len(self.text) + 1:
@@ -128,25 +126,22 @@
stop = self.last_matched_index + 1
assert stop >= 0
source = self.text[start: stop]
- lineno, columnno = self.adjust_position(source)
- if self.automaton.names[self.last_matched_state] in self.ignore:
+ result = self.make_token(start, self.last_matched_index, source)
+ self.adjust_position(source)
+ if self.ignore_token(self.last_matched_state):
continue
- source_pos = SourcePos(start, lineno, columnno)
- return Token(self.automaton.names[self.last_matched_state],
- source, source_pos)
+ return result
if self.last_matched_index == i - 1:
- token = self.text[start: ]
- lineno, columnno = self.adjust_position(token)
- if self.automaton.names[self.last_matched_state] in self.ignore:
+ source = self.text[start: ]
+ result = self.make_token(start, self.last_matched_index, source)
+ self.adjust_position(source)
+ if self.ignore_token(self.last_matched_state):
if self.eof:
self.last_matched_index += 1
- return Token("EOF", "EOF",
- SourcePos(i, self.lineno, self.columnno))
+ return self.make_token(i, -1, "", eof=True)
else:
raise StopIteration
- return Token(self.automaton.names[self.last_matched_state],
- self.text[start:],
- SourcePos(start, lineno, columnno))
+ return result
source_pos = SourcePos(i - 1, self.lineno, self.columnno)
raise deterministic.LexerError(self.text, self.state, source_pos)
@@ -158,7 +153,6 @@
self.columnno += len(token)
else:
self.columnno = token.rfind("\n")
- return lineno, columnno
# def inner_loop(self, i):
# while i < len(self.text):
@@ -184,3 +178,17 @@
def __iter__(self):
return self
+class LexingDFARunner(AbstractLexingDFARunner):
+ def __init__(self, matcher, automaton, text, ignore, eof=False):
+ AbstractLexingDFARunner.__init__(self, matcher, automaton, text, eof)
+ self.ignore = ignore
+
+ def ignore_token(self, state):
+ return self.automaton.names[self.last_matched_state] in self.ignore
+
+ def make_token(self, index, state, text, eof=False):
+ source_pos = SourcePos(index, self.lineno, self.columnno)
+ if eof:
+ return Token("EOF", "EOF", source_pos)
+ return Token(self.automaton.names[self.last_matched_state],
+ text, source_pos)
More information about the Pypy-commit
mailing list