[Python-checkins] CVS: python/dist/src/Lib sre.py,1.18,1.19 sre_compile.py,1.20,1.21 sre_constants.py,1.14,1.15 sre_parse.py,1.21,1.22
Fredrik Lundh
python-dev@python.org
Sun, 2 Jul 2000 10:33:30 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv31347/Lib
Modified Files:
sre.py sre_compile.py sre_constants.py sre_parse.py
Log Message:
- actually enabled charset anchors in the engine (still not
used by the code generator)
- changed max repeat value in engine (to match earlier array fix)
- added experimental "which part matched?" mechanism to sre; see
http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954
or python-dev for details.
Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -r1.18 -r1.19
*** sre.py 2000/07/01 17:50:59 1.18
--- sre.py 2000/07/02 17:33:27 1.19
***************
*** 156,157 ****
--- 156,188 ----
copy_reg.pickle(type(_compile("")), _pickle, _compile)
+
+ # --------------------------------------------------------------------
+ # experimental stuff (see python-dev discussions for details)
+
+ class Scanner:
+ def __init__(self, lexicon):
+ self.lexicon = lexicon
+ p = []
+ for phrase, action in lexicon:
+ p.append("(?:%s)(?P#%d)" % (phrase, len(p)))
+ self.scanner = sre.compile("|".join(p))
+ def scan(self, string):
+ result = []
+ append = result.append
+ match = self.scanner.match
+ i = 0
+ while 1:
+ m = match(string, i)
+ if not m:
+ break
+ j = m.end()
+ if i == j:
+ break
+ action = self.lexicon[m.index][1]
+ if callable(action):
+ self.match = match
+ action = action(self, m.group())
+ if action is not None:
+ append(action)
+ i = j
+ return result, string[i:]
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -r1.20 -r1.21
*** sre_compile.py 2000/07/02 12:00:06 1.20
--- sre_compile.py 2000/07/02 17:33:27 1.21
***************
*** 209,213 ****
emit(OPCODES[op])
emit(av-1)
! elif op is MARK:
emit(OPCODES[op])
emit(av)
--- 209,213 ----
emit(OPCODES[op])
emit(av-1)
! elif op in (MARK, INDEX):
emit(OPCODES[op])
emit(av)
Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -r1.14 -r1.15
*** sre_constants.py 2000/07/02 12:00:06 1.14
--- sre_constants.py 2000/07/02 17:33:27 1.15
***************
*** 34,37 ****
--- 34,38 ----
IN = "in"
IN_IGNORE = "in_ignore"
+ INDEX = "index"
INFO = "info"
JUMP = "jump"
***************
*** 91,94 ****
--- 92,96 ----
CHARSET,
GROUP, GROUP_IGNORE,
+ INDEX,
IN, IN_IGNORE,
INFO,
Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -r1.21 -r1.22
*** sre_parse.py 2000/07/02 12:00:06 1.21
--- sre_parse.py 2000/07/02 17:33:27 1.22
***************
*** 452,455 ****
--- 452,472 ----
raise error, "unknown group name"
subpattern.append((GROUP, gid))
+ elif source.match("#"):
+ index = ""
+ while 1:
+ char = source.get()
+ if char is None:
+ raise error, "unterminated index"
+ if char == ")":
+ break
+ index = index + char
+ try:
+ index = int(index)
+ if index < 0 or index > MAXREPEAT:
+ raise ValueError
+ except ValueError:
+ raise error, "illegal index"
+ subpattern.append((INDEX, index))
+ continue
else:
char = source.get()