[Python-checkins] CVS: python/dist/src/Lib sre.py,1.1,1.2 sre_compile.py,1.1,1.2 sre_constants.py,1.1,1.2 sre_parse.py,1.1,1.2

Andrew M. Kuchling python-dev@python.org
Sun, 2 Apr 2000 00:22:34 -0500 (EST)


Update of /projects/cvsroot/python/dist/src/Lib
In directory amarok:/home/akuchlin/src/Python-1.5/Lib

Modified Files:
	sre.py sre_compile.py sre_constants.py sre_parse.py 
Log Message:
This patch looks large, but it just deletes the ^M characters and 
   untabifies the files.  No actual code changes were made.


Index: sre.py
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Lib/sre.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** sre.py	2000/03/31 14:58:54	1.1
--- sre.py	2000/04/02 05:22:29	1.2
***************
*** 1,46 ****
! # -*- Mode: Python; tab-width: 4 -*-
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre.py,v 1.1 2000/03/31 14:58:54 guido Exp $
! #
! # re-compatible interface for the sre matching engine
! #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! """
! this is a long string
! """
! 
! import sre_compile
! 
! # --------------------------------------------------------------------
! # public interface
! 
! def compile(pattern, flags=0):
!     return sre_compile.compile(pattern, _fixflags(flags))
! 
! def match(pattern, string, flags=0):
!     return compile(pattern, _fixflags(flags)).match(string)
! 
! def search(pattern, string, flags=0):
!     assert flags == 0
!     return compile(pattern, _fixflags(flags)).search(string)
! 
! # FIXME: etc
! 
! # --------------------------------------------------------------------
! # helpers
! 
! def _fixflags(flags):
!     # convert flag bitmask to sequence
!     assert flags == 0
!     return ()
! 
--- 1,46 ----
! # -*- Mode: Python; tab-width: 4 -*-
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre.py,v 1.2 2000/04/02 05:22:29 akuchlin Exp $
! #
! # re-compatible interface for the sre matching engine
! #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! """
! this is a long string
! """
! 
! import sre_compile
! 
! # --------------------------------------------------------------------
! # public interface
! 
! def compile(pattern, flags=0):
!     return sre_compile.compile(pattern, _fixflags(flags))
! 
! def match(pattern, string, flags=0):
!     return compile(pattern, _fixflags(flags)).match(string)
! 
! def search(pattern, string, flags=0):
!     assert flags == 0
!     return compile(pattern, _fixflags(flags)).search(string)
! 
! # FIXME: etc
! 
! # --------------------------------------------------------------------
! # helpers
! 
! def _fixflags(flags):
!     # convert flag bitmask to sequence
!     assert flags == 0
!     return ()
! 

Index: sre_compile.py
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** sre_compile.py	2000/03/31 14:58:54	1.1
--- sre_compile.py	2000/04/02 05:22:30	1.2
***************
*** 1,187 ****
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre_compile.py,v 1.1 2000/03/31 14:58:54 guido Exp $
! #
! # convert template to internal format
! #
! # Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! # FIXME: <fl> formalize (objectify?) and document the compiler code
! # format, so that other frontends can use this compiler
! 
! import array, string, sys
! 
! import _sre
! 
! from sre_constants import *
! 
! # find an array type code that matches the engine's code size
! for WORDSIZE in "BHil":
!     if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
! 	break
! else:
!     raise RuntimeError, "cannot find a useable array type"
! 
! # FIXME: <fl> should move some optimizations from the parser to here!
! 
! class Code:
!     def __init__(self):
! 	self.data = []
!     def __len__(self):
! 	return len(self.data)
!     def __getitem__(self, index):
! 	return self.data[index]
!     def __setitem__(self, index, code):
! 	self.data[index] = code
!     def append(self, code):
! 	self.data.append(code)
!     def todata(self):
! 	# print self.data
! 	return array.array(WORDSIZE, self.data).tostring()
! 
! def _lower(literal):
!     # return _sre._lower(literal) # FIXME
!     return string.lower(literal)
! 
! def _compile(code, pattern, flags):
!     append = code.append
!     for op, av in pattern:
! 	if op is ANY:
! 	    if "s" in flags:
! 		append(CODES[op]) # any character at all!
! 	    else:
! 		append(CODES[NOT_LITERAL])
! 		append(10)
! 	elif op in (SUCCESS, FAILURE):
! 	    append(CODES[op])
! 	elif op is AT:
! 	    append(CODES[op])
! 	    append(POSITIONS[av])
! 	elif op is BRANCH:
! 	    append(CODES[op])
! 	    tail = []
! 	    for av in av[1]:
! 		skip = len(code); append(0)
! 		_compile(code, av, flags)
! 		append(CODES[JUMP])
! 		tail.append(len(code)); append(0)
! 		code[skip] = len(code) - skip
! 	    append(0) # end of branch
! 	    for tail in tail:
! 		code[tail] = len(code) - tail
! 	elif op is CALL:
! 	    append(CODES[op])
! 	    skip = len(code); append(0)
! 	    _compile(code, av, flags)
! 	    append(CODES[SUCCESS])
! 	    code[skip] = len(code) - skip
! 	elif op is CATEGORY: # not used by current parser
! 	    append(CODES[op])
! 	    append(CATEGORIES[av])
! 	elif op is GROUP:
! 	    if "i" in flags:
! 		append(CODES[MAP_IGNORE[op]])
! 	    else:
! 		append(CODES[op])
! 	    append(av)
! 	elif op is IN:
! 	    if "i" in flags:
! 		append(CODES[MAP_IGNORE[op]])
! 		def fixup(literal):
! 		    return ord(_lower(literal))
! 	    else:
! 		append(CODES[op])
! 		fixup = ord
! 	    skip = len(code); append(0)
! 	    for op, av in av:
! 		append(CODES[op])
! 		if op is NEGATE:
! 		    pass
! 		elif op is LITERAL:
! 		    append(fixup(av))
! 		elif op is RANGE:
! 		    append(fixup(av[0]))
! 		    append(fixup(av[1]))
! 		elif op is CATEGORY:
! 		    append(CATEGORIES[av])
! 		else:
! 		    raise ValueError, "unsupported set operator"
! 	    append(CODES[FAILURE])
! 	    code[skip] = len(code) - skip
! 	elif op in (LITERAL, NOT_LITERAL):
! 	    if "i" in flags:
! 		append(CODES[MAP_IGNORE[op]])
! 		append(ord(_lower(av)))
! 	    else:
! 		append(CODES[op])
! 		append(ord(av))
! 	elif op is MARK:
! 	    append(CODES[op])
! 	    append(av)
!  	elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
! 	    lo, hi = av[2].getwidth()
!  	    if lo == 0:
!  		raise SyntaxError, "cannot repeat zero-width items"
! 	    if lo == hi == 1 and op is MAX_REPEAT:
! 		append(CODES[MAX_REPEAT_ONE])
! 		skip = len(code); append(0)
! 		append(av[0])
! 		append(av[1])
! 		_compile(code, av[2], flags)
! 		append(CODES[SUCCESS])
! 		code[skip] = len(code) - skip
! 	    else:
! 		append(CODES[op])
! 		skip = len(code); append(0)
! 		append(av[0])
! 		append(av[1])
! 		_compile(code, av[2], flags)
! 		if op is MIN_REPEAT:
! 		    append(CODES[MIN_UNTIL])
! 		else:
! 		    # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
! 		    append(CODES[MAX_UNTIL])
! 		code[skip] = len(code) - skip
! 	elif op is SUBPATTERN:
! ## 	    group = av[0]
! ## 	    if group:
! ## 		append(CODES[MARK])
! ## 		append((group-1)*2)
! 	    _compile(code, av[1], flags)
! ## 	    if group:
! ## 		append(CODES[MARK])
! ## 		append((group-1)*2+1)
! 	else:
! 	    raise ValueError, ("unsupported operand type", op)
! 
! def compile(p, flags=()):
!     # convert pattern list to internal format
!     if type(p) is type(""):
! 	import sre_parse
! 	pattern = p
! 	p = sre_parse.parse(p)
!     else:
! 	pattern = None
!     # print p.getwidth()
!     # print p
!     code = Code()
!     _compile(code, p.data, p.pattern.flags)
!     code.append(CODES[SUCCESS])
!     # print list(code.data)
!     data = code.todata()
!     if 0: # debugging
! 	print
! 	print "-" * 68
! 	import sre_disasm
! 	sre_disasm.disasm(data)
! 	print "-" * 68
!     # print len(data), p.pattern.groups, len(p.pattern.groupdict)
!     return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
--- 1,187 ----
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre_compile.py,v 1.2 2000/04/02 05:22:30 akuchlin Exp $
! #
! # convert template to internal format
! #
! # Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! # FIXME: <fl> formalize (objectify?) and document the compiler code
! # format, so that other frontends can use this compiler
! 
! import array, string, sys
! 
! import _sre
! 
! from sre_constants import *
! 
! # find an array type code that matches the engine's code size
! for WORDSIZE in "BHil":
!     if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
!         break
! else:
!     raise RuntimeError, "cannot find a useable array type"
! 
! # FIXME: <fl> should move some optimizations from the parser to here!
! 
! class Code:
!     def __init__(self):
!         self.data = []
!     def __len__(self):
!         return len(self.data)
!     def __getitem__(self, index):
!         return self.data[index]
!     def __setitem__(self, index, code):
!         self.data[index] = code
!     def append(self, code):
!         self.data.append(code)
!     def todata(self):
!         # print self.data
!         return array.array(WORDSIZE, self.data).tostring()
! 
! def _lower(literal):
!     # return _sre._lower(literal) # FIXME
!     return string.lower(literal)
! 
! def _compile(code, pattern, flags):
!     append = code.append
!     for op, av in pattern:
!         if op is ANY:
!             if "s" in flags:
!                 append(CODES[op]) # any character at all!
!             else:
!                 append(CODES[NOT_LITERAL])
!                 append(10)
!         elif op in (SUCCESS, FAILURE):
!             append(CODES[op])
!         elif op is AT:
!             append(CODES[op])
!             append(POSITIONS[av])
!         elif op is BRANCH:
!             append(CODES[op])
!             tail = []
!             for av in av[1]:
!                 skip = len(code); append(0)
!                 _compile(code, av, flags)
!                 append(CODES[JUMP])
!                 tail.append(len(code)); append(0)
!                 code[skip] = len(code) - skip
!             append(0) # end of branch
!             for tail in tail:
!                 code[tail] = len(code) - tail
!         elif op is CALL:
!             append(CODES[op])
!             skip = len(code); append(0)
!             _compile(code, av, flags)
!             append(CODES[SUCCESS])
!             code[skip] = len(code) - skip
!         elif op is CATEGORY: # not used by current parser
!             append(CODES[op])
!             append(CATEGORIES[av])
!         elif op is GROUP:
!             if "i" in flags:
!                 append(CODES[MAP_IGNORE[op]])
!             else:
!                 append(CODES[op])
!             append(av)
!         elif op is IN:
!             if "i" in flags:
!                 append(CODES[MAP_IGNORE[op]])
!                 def fixup(literal):
!                     return ord(_lower(literal))
!             else:
!                 append(CODES[op])
!                 fixup = ord
!             skip = len(code); append(0)
!             for op, av in av:
!                 append(CODES[op])
!                 if op is NEGATE:
!                     pass
!                 elif op is LITERAL:
!                     append(fixup(av))
!                 elif op is RANGE:
!                     append(fixup(av[0]))
!                     append(fixup(av[1]))
!                 elif op is CATEGORY:
!                     append(CATEGORIES[av])
!                 else:
!                     raise ValueError, "unsupported set operator"
!             append(CODES[FAILURE])
!             code[skip] = len(code) - skip
!         elif op in (LITERAL, NOT_LITERAL):
!             if "i" in flags:
!                 append(CODES[MAP_IGNORE[op]])
!                 append(ord(_lower(av)))
!             else:
!                 append(CODES[op])
!                 append(ord(av))
!         elif op is MARK:
!             append(CODES[op])
!             append(av)
!         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
!             lo, hi = av[2].getwidth()
!             if lo == 0:
!                 raise SyntaxError, "cannot repeat zero-width items"
!             if lo == hi == 1 and op is MAX_REPEAT:
!                 append(CODES[MAX_REPEAT_ONE])
!                 skip = len(code); append(0)
!                 append(av[0])
!                 append(av[1])
!                 _compile(code, av[2], flags)
!                 append(CODES[SUCCESS])
!                 code[skip] = len(code) - skip
!             else:
!                 append(CODES[op])
!                 skip = len(code); append(0)
!                 append(av[0])
!                 append(av[1])
!                 _compile(code, av[2], flags)
!                 if op is MIN_REPEAT:
!                     append(CODES[MIN_UNTIL])
!                 else:
!                     # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
!                     append(CODES[MAX_UNTIL])
!                 code[skip] = len(code) - skip
!         elif op is SUBPATTERN:
! ##          group = av[0]
! ##          if group:
! ##              append(CODES[MARK])
! ##              append((group-1)*2)
!             _compile(code, av[1], flags)
! ##          if group:
! ##              append(CODES[MARK])
! ##              append((group-1)*2+1)
!         else:
!             raise ValueError, ("unsupported operand type", op)
! 
! def compile(p, flags=()):
!     # convert pattern list to internal format
!     if type(p) is type(""):
!         import sre_parse
!         pattern = p
!         p = sre_parse.parse(p)
!     else:
!         pattern = None
!     # print p.getwidth()
!     # print p
!     code = Code()
!     _compile(code, p.data, p.pattern.flags)
!     code.append(CODES[SUCCESS])
!     # print list(code.data)
!     data = code.todata()
!     if 0: # debugging
!         print
!         print "-" * 68
!         import sre_disasm
!         sre_disasm.disasm(data)
!         print "-" * 68
!     # print len(data), p.pattern.groups, len(p.pattern.groupdict)
!     return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)

Index: sre_constants.py
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** sre_constants.py	2000/03/31 14:58:54	1.1
--- sre_constants.py	2000/04/02 05:22:30	1.2
***************
*** 1,131 ****
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre_constants.py,v 1.1 2000/03/31 14:58:54 guido Exp $
! #
! # various symbols used by the regular expression engine.
! # run this script to update the _sre include files!
! #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! # operators
! 
! FAILURE = "failure"
! SUCCESS = "success"
! 
! ANY = "any"
! ASSERT = "assert"
! AT = "at"
! BRANCH = "branch"
! CALL = "call"
! CATEGORY = "category"
! GROUP = "group"
! GROUP_IGNORE = "group_ignore"
! IN = "in"
! IN_IGNORE = "in_ignore"
! JUMP = "jump"
! LITERAL = "literal"
! LITERAL_IGNORE = "literal_ignore"
! MARK = "mark"
! MAX_REPEAT = "max_repeat"
! MAX_REPEAT_ONE = "max_repeat_one"
! MAX_UNTIL = "max_until"
! MIN_REPEAT = "min_repeat"
! MIN_UNTIL = "min_until"
! NEGATE = "negate"
! NOT_LITERAL = "not_literal"
! NOT_LITERAL_IGNORE = "not_literal_ignore"
! RANGE = "range"
! REPEAT = "repeat"
! SUBPATTERN = "subpattern"
! 
! # positions
! AT_BEGINNING = "at_beginning"
! AT_BOUNDARY = "at_boundary"
! AT_NON_BOUNDARY = "at_non_boundary"
! AT_END = "at_end"
! 
! # categories
! 
! CATEGORY_DIGIT = "category_digit"
! CATEGORY_NOT_DIGIT = "category_not_digit"
! CATEGORY_SPACE = "category_space"
! CATEGORY_NOT_SPACE = "category_not_space"
! CATEGORY_WORD = "category_word"
! CATEGORY_NOT_WORD = "category_not_word"
! 
! CODES = [
! 
!     # failure=0 success=1 (just because it looks better that way :-)
!     FAILURE, SUCCESS,
! 
!     ANY,
!     ASSERT,
!     AT,
!     BRANCH,
!     CALL,
!     CATEGORY,
!     GROUP, GROUP_IGNORE,
!     IN, IN_IGNORE,
!     JUMP,
!     LITERAL, LITERAL_IGNORE,
!     MARK,
!     MAX_REPEAT, MAX_UNTIL,
!     MAX_REPEAT_ONE,
!     MIN_REPEAT, MIN_UNTIL,
!     NOT_LITERAL, NOT_LITERAL_IGNORE,
!     NEGATE,
!     RANGE,
!     REPEAT
! 
! ]
! 
! # convert to dictionary
! c = {}
! i = 0
! for code in CODES:
!     c[code] = i
!     i = i + 1
! CODES = c
! 
! # replacement operations for "ignore case" mode
! MAP_IGNORE = {
!     GROUP: GROUP_IGNORE,
!     IN: IN_IGNORE,
!     LITERAL: LITERAL_IGNORE,
!     NOT_LITERAL: NOT_LITERAL_IGNORE
! }
! 
! POSITIONS = {
!     AT_BEGINNING: ord("a"),
!     AT_BOUNDARY: ord("b"),
!     AT_NON_BOUNDARY: ord("B"),
!     AT_END: ord("z"),
! }
! 
! CATEGORIES = {
!     CATEGORY_DIGIT: ord("d"),
!     CATEGORY_NOT_DIGIT: ord("D"),
!     CATEGORY_SPACE: ord("s"),
!     CATEGORY_NOT_SPACE: ord("S"),
!     CATEGORY_WORD: ord("w"),
!     CATEGORY_NOT_WORD: ord("W"),
! }
! 
! if __name__ == "__main__":
!     import string
!     items = CODES.items()
!     items.sort(lambda a, b: cmp(a[1], b[1]))
!     f = open("sre_constants.h", "w")
!     f.write("/* generated by sre_constants.py */\n")
!     for k, v in items:
! 	f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
!     f.close()
!     print "done"
--- 1,131 ----
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre_constants.py,v 1.2 2000/04/02 05:22:30 akuchlin Exp $
! #
! # various symbols used by the regular expression engine.
! # run this script to update the _sre include files!
! #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! # operators
! 
! FAILURE = "failure"
! SUCCESS = "success"
! 
! ANY = "any"
! ASSERT = "assert"
! AT = "at"
! BRANCH = "branch"
! CALL = "call"
! CATEGORY = "category"
! GROUP = "group"
! GROUP_IGNORE = "group_ignore"
! IN = "in"
! IN_IGNORE = "in_ignore"
! JUMP = "jump"
! LITERAL = "literal"
! LITERAL_IGNORE = "literal_ignore"
! MARK = "mark"
! MAX_REPEAT = "max_repeat"
! MAX_REPEAT_ONE = "max_repeat_one"
! MAX_UNTIL = "max_until"
! MIN_REPEAT = "min_repeat"
! MIN_UNTIL = "min_until"
! NEGATE = "negate"
! NOT_LITERAL = "not_literal"
! NOT_LITERAL_IGNORE = "not_literal_ignore"
! RANGE = "range"
! REPEAT = "repeat"
! SUBPATTERN = "subpattern"
! 
! # positions
! AT_BEGINNING = "at_beginning"
! AT_BOUNDARY = "at_boundary"
! AT_NON_BOUNDARY = "at_non_boundary"
! AT_END = "at_end"
! 
! # categories
! 
! CATEGORY_DIGIT = "category_digit"
! CATEGORY_NOT_DIGIT = "category_not_digit"
! CATEGORY_SPACE = "category_space"
! CATEGORY_NOT_SPACE = "category_not_space"
! CATEGORY_WORD = "category_word"
! CATEGORY_NOT_WORD = "category_not_word"
! 
! CODES = [
! 
!     # failure=0 success=1 (just because it looks better that way :-)
!     FAILURE, SUCCESS,
! 
!     ANY,
!     ASSERT,
!     AT,
!     BRANCH,
!     CALL,
!     CATEGORY,
!     GROUP, GROUP_IGNORE,
!     IN, IN_IGNORE,
!     JUMP,
!     LITERAL, LITERAL_IGNORE,
!     MARK,
!     MAX_REPEAT, MAX_UNTIL,
!     MAX_REPEAT_ONE,
!     MIN_REPEAT, MIN_UNTIL,
!     NOT_LITERAL, NOT_LITERAL_IGNORE,
!     NEGATE,
!     RANGE,
!     REPEAT
! 
! ]
! 
! # convert to dictionary
! c = {}
! i = 0
! for code in CODES:
!     c[code] = i
!     i = i + 1
! CODES = c
! 
! # replacement operations for "ignore case" mode
! MAP_IGNORE = {
!     GROUP: GROUP_IGNORE,
!     IN: IN_IGNORE,
!     LITERAL: LITERAL_IGNORE,
!     NOT_LITERAL: NOT_LITERAL_IGNORE
! }
! 
! POSITIONS = {
!     AT_BEGINNING: ord("a"),
!     AT_BOUNDARY: ord("b"),
!     AT_NON_BOUNDARY: ord("B"),
!     AT_END: ord("z"),
! }
! 
! CATEGORIES = {
!     CATEGORY_DIGIT: ord("d"),
!     CATEGORY_NOT_DIGIT: ord("D"),
!     CATEGORY_SPACE: ord("s"),
!     CATEGORY_NOT_SPACE: ord("S"),
!     CATEGORY_WORD: ord("w"),
!     CATEGORY_NOT_WORD: ord("W"),
! }
! 
! if __name__ == "__main__":
!     import string
!     items = CODES.items()
!     items.sort(lambda a, b: cmp(a[1], b[1]))
!     f = open("sre_constants.h", "w")
!     f.write("/* generated by sre_constants.py */\n")
!     for k, v in items:
!         f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
!     f.close()
!     print "done"

Index: sre_parse.py
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** sre_parse.py	2000/03/31 14:58:54	1.1
--- sre_parse.py	2000/04/02 05:22:30	1.2
***************
*** 1,492 ****
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre_parse.py,v 1.1 2000/03/31 14:58:54 guido Exp $
! #
! # convert re-style regular expression to SRE template.  the current
! # implementation is somewhat incomplete, and not very fast.  should
! # definitely be rewritten before Python 1.6 goes beta.
! #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! # FIXME: comments marked with the FIXME tag are open issues.  all such
! # issues should be closed before the final beta.
! 
! import string, sys
! 
! from sre_constants import *
! 
! SPECIAL_CHARS = ".\\[{()*+?^$|"
! REPEAT_CHARS  = "*+?{"
! 
! OCTDIGITS = "01234567"
! HEXDIGITS = "0123456789abcdefABCDEF"
! 
! ESCAPES = {
!     "\\a": (LITERAL, chr(7)),
!     "\\b": (LITERAL, chr(8)),
!     "\\f": (LITERAL, chr(12)),
!     "\\n": (LITERAL, chr(10)),
!     "\\r": (LITERAL, chr(13)),
!     "\\t": (LITERAL, chr(9)),
!     "\\v": (LITERAL, chr(11))
! }
! 
! CATEGORIES = {
!     "\\A": (AT, AT_BEGINNING), # start of string
!     "\\b": (AT, AT_BOUNDARY),
!     "\\B": (AT, AT_NON_BOUNDARY),
!     "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
!     "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
!     "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
!     "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
!     "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
!     "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
!     "\\Z": (AT, AT_END), # end of string
! }
! 
! class Pattern:
!     # FIXME: <fl> rename class, and store flags in here too!
!     def __init__(self):
! 	self.flags = []
! 	self.groups = 1
! 	self.groupdict = {}
!     def getgroup(self, name=None):
! 	gid = self.groups
! 	self.groups = gid + 1
! 	if name:
! 	    self.groupdict[name] = gid
! 	return gid
!     def setflag(self, flag):
! 	if flag not in self.flags:
! 	    self.flags.append(flag)
! 
! class SubPattern:
!     # a subpattern, in intermediate form
!     def __init__(self, pattern, data=None):
! 	self.pattern = pattern
! 	if not data:
! 	    data = []
! 	self.data = data
! 	self.flags = []
! 	self.width = None
!     def __repr__(self):
! 	return repr(self.data)
!     def __len__(self):
! 	return len(self.data)
!     def __delitem__(self, index):
! 	del self.data[index]
!     def __getitem__(self, index):
! 	return self.data[index]
!     def __setitem__(self, index, code):
! 	self.data[index] = code
!     def __getslice__(self, start, stop):
! 	return SubPattern(self.pattern, self.data[start:stop])
!     def insert(self, index, code):
! 	self.data.insert(index, code)
!     def append(self, code):
! 	self.data.append(code)
!     def getwidth(self):
! 	# determine the width (min, max) for this subpattern
! 	if self.width:
! 	    return self.width
! 	lo = hi = 0L
! 	for op, av in self.data:
! 	    if op is BRANCH:
! 		l = sys.maxint
! 		h = 0
! 		for av in av[1]:
! 		    i, j = av.getwidth()
! 		    l = min(l, i)
! 		    h = min(h, j)
! 		lo = lo + i
! 		hi = hi + j
! 	    elif op is CALL:
! 		i, j = av.getwidth()
! 		lo = lo + i
! 		hi = hi + j
! 	    elif op is SUBPATTERN:
! 		i, j = av[1].getwidth()
! 		lo = lo + i
! 		hi = hi + j
! 	    elif op in (MIN_REPEAT, MAX_REPEAT):
! 		i, j = av[2].getwidth()
! 		lo = lo + i * av[0]
! 		hi = hi + j * av[1]
! 	    elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
! 		lo = lo + 1
! 		hi = hi + 1
! 	    elif op == SUCCESS:
! 		break
! 	self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
! 	return self.width
!     def set(self, flag):
! 	if not flag in self.flags:
! 	    self.flags.append(flag)
!     def reset(self, flag):
! 	if flag in self.flags:
! 	    self.flags.remove(flag)
! 
! class Tokenizer:
!     def __init__(self, string):
! 	self.string = list(string)
! 	self.next = self.__next()
!     def __next(self):
! 	if not self.string:
! 	    return None
! 	char = self.string[0]
! 	if char[0] == "\\":
! 	    try:
! 		c = self.string[1]
! 	    except IndexError:
! 		raise SyntaxError, "bogus escape"
! 	    char = char + c
! 	    try:
! 		if c == "x":
! 		    # hexadecimal constant
! 		    for i in xrange(2, sys.maxint):
! 			c = self.string[i]
! 			if c not in HEXDIGITS:
! 			    break
! 			char = char + c
! 		elif c in string.digits:
! 		    # decimal (or octal) number
! 		    for i in xrange(2, sys.maxint):
! 			c = self.string[i]
! 			# FIXME: if larger than current number of
! 			# groups, interpret as an octal number 
! 			if c not in string.digits:
! 			    break
! 			char = char + c
! 	    except IndexError:
! 		pass # use what we've got this far
! 	del self.string[0:len(char)]
! 	return char
!     def match(self, char):
! 	if char == self.next:
! 	    self.next = self.__next()
! 	    return 1
! 	return 0
!     def match_set(self, set):
! 	if self.next in set:
! 	    self.next = self.__next()
! 	    return 1
! 	return 0
!     def get(self):
! 	this = self.next
! 	self.next = self.__next()
! 	return this
! 
! def _fixescape(escape, character_class=0):
!     # convert escape to (type, value)
!     if character_class:
! 	# inside a character class, we'll look in the character
! 	# escapes dictionary first
! 	code = ESCAPES.get(escape)
! 	if code:
! 	    return code
! 	code = CATEGORIES.get(escape)
!     else:
! 	code = CATEGORIES.get(escape)
! 	if code:
! 	    return code
! 	code = ESCAPES.get(escape)
!     if code:
! 	return code
!     if not character_class:
! 	try:
! 	    group = int(escape[1:])
! 	    # FIXME: only valid if group <= current number of groups
! 	    return GROUP, group
! 	except ValueError:
! 	    pass
!     try:
! 	if escape[1:2] == "x":
! 	    escape = escape[2:]
! 	    return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
! 	elif escape[1:2] in string.digits:
! 	    return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
! 	elif len(escape) == 2:
! 	    return LITERAL, escape[1]
!     except ValueError:
! 	pass
!     raise SyntaxError, "bogus escape: %s" % repr(escape)
! 
! def _branch(subpattern, items):
! 
!     # form a branch operator from a set of items (FIXME: move this
!     # optimization to the compiler module!)
! 
!     # check if all items share a common prefix
!     while 1:
! 	prefix = None
! 	for item in items:
! 	    if not item:
! 		break
! 	    if prefix is None:
! 		prefix = item[0]
! 	    elif item[0] != prefix:
! 		break
! 	else:
! 	    # all subitems start with a common "prefix".
! 	    # move it out of the branch
! 	    for item in items:
! 		del item[0]
! 	    subpattern.append(prefix)
! 	    continue # check next one
! 	break
! 
!     # check if the branch can be replaced by a character set
!     for item in items:
! 	if len(item) != 1 or item[0][0] != LITERAL:
! 	    break
!     else:
! 	# we can store this as a character set instead of a
! 	# branch (FIXME: use a range if possible)
! 	set = []
! 	for item in items:
! 	    set.append(item[0])
! 	subpattern.append((IN, set))
! 	return
! 
!     subpattern.append((BRANCH, (None, items)))
! 
! def _parse(source, pattern, flags=()):
! 
!     # parse regular expression pattern into an operator list.
! 
!     subpattern = SubPattern(pattern)
! 
!     this = None
! 
!     while 1:
! 
! 	if source.next in ("|", ")"):
! 	    break # end of subpattern
! 	this = source.get()
! 	if this is None:
! 	    break # end of pattern
! 
! 	if this and this[0] not in SPECIAL_CHARS:
! 	    subpattern.append((LITERAL, this))
! 
! 	elif this == "[":
! 	    # character set
! 	    set = []
! ## 	    if source.match(":"):
! ## 		pass # handle character classes
! 	    if source.match("^"):
! 		set.append((NEGATE, None))
! 	    # check remaining characters
! 	    start = set[:]
! 	    while 1:
! 		this = source.get()
! 		if this == "]" and set != start:
! 		    break
! 		elif this and this[0] == "\\":
! 		    code1 = _fixescape(this, 1)
! 		elif this:
! 		    code1 = LITERAL, this
! 		else:
! 		    raise SyntaxError, "unexpected end of regular expression"
! 		if source.match("-"):
! 		    # potential range
! 		    this = source.get()
! 		    if this == "]":
! 			set.append(code1)
! 			set.append((LITERAL, "-"))
! 			break
! 		    else:
! 			if this[0] == "\\":
! 			    code2 = _fixescape(this, 1)
! 			else:
! 			    code2 = LITERAL, this
! 			if code1[0] != LITERAL or code2[0] != LITERAL:
! 			    raise SyntaxError, "illegal range"
! 			if len(code1[1]) != 1 or len(code2[1]) != 1:
! 			    raise SyntaxError, "illegal range"
! 			set.append((RANGE, (code1[1], code2[1])))
! 		else:
! 		    if code1[0] is IN:
! 			code1 = code1[1][0]
! 		    set.append(code1)
! 
! 	    # FIXME: <fl> move set optimization to support function
! 	    if len(set)==1 and set[0][0] is LITERAL:
! 		subpattern.append(set[0]) # optimization
! 	    elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
! 		subpattern.append((NOT_LITERAL, set[1][1])) # optimization
! 	    else:
! 		# FIXME: <fl> add charmap optimization
! 		subpattern.append((IN, set))
! 
! 	elif this and this[0] in REPEAT_CHARS:
! 	    # repeat previous item
! 	    if this == "?":
! 		min, max = 0, 1
! 	    elif this == "*":
! 		min, max = 0, sys.maxint
! 	    elif this == "+":
! 		min, max = 1, sys.maxint
! 	    elif this == "{":
! 		min, max = 0, sys.maxint
! 		lo = hi = ""
! 		while source.next in string.digits:
! 		    lo = lo + source.get()
! 		if source.match(","):
! 		    while source.next in string.digits:
! 			hi = hi + source.get()
! 		else:
! 		    hi = lo
! 		if not source.match("}"):
! 		    raise SyntaxError, "bogus range"
! 		if lo:
! 		    min = int(lo)
! 		if hi:
! 		    max = int(hi)
! 		# FIXME: <fl> check that hi >= lo!
! 	    else:
! 		raise SyntaxError, "not supported"
! 	    # figure out which item to repeat
! 	    # FIXME: should back up to the right mark, right?
! 	    if subpattern:
! 		index = len(subpattern)-1
! 		while subpattern[index][0] is MARK:
! 		    index = index - 1
! 		item = subpattern[index:index+1]
! 	    else:
! 		raise SyntaxError, "nothing to repeat"
! 	    if source.match("?"):
! 		subpattern[index] = (MIN_REPEAT, (min, max, item))
! 	    else:
! 		subpattern[index] = (MAX_REPEAT, (min, max, item))
! 	elif this == ".":
! 	    subpattern.append((ANY, None))
! 	elif this == "(":
! 	    group = 1
! 	    name = None
! 	    if source.match("?"):
! 		group = 0
! 		# options
! 		if source.match("P"):
! 		    # named group: skip forward to end of name
! 		    if source.match("<"):
! 			name = ""
! 			while 1:
! 			    char = source.get()
! 			    if char in (">", None):
! 				break
! 			    name = name + char
! 			group = 1
! 		elif source.match(":"):
! 		    # non-capturing group
! 		    group = 2
! 		elif source.match_set("iI"):
! 		    pattern.setflag("i")
! 		elif source.match_set("lL"):
! 		    pattern.setflag("l")
! 		elif source.match_set("mM"):
! 		    pattern.setflag("m")
! 		elif source.match_set("sS"):
! 		    pattern.setflag("s")
! 		elif source.match_set("xX"):
! 		    pattern.setflag("x")
! 	    if group:
! 		# parse group contents
! 		b = []
! 		if group == 2:
! 		    # anonymous group
! 		    group = None
! 		else:
! 		    group = pattern.getgroup(name)
!  		if group:
!  		    subpattern.append((MARK, (group-1)*2))
! 		while 1:
! 		    p = _parse(source, pattern, flags)
! 		    if source.match(")"):
! 			if b:
! 			    b.append(p)
! 			    _branch(subpattern, b)
! 			else:
! 			    subpattern.append((SUBPATTERN, (group, p)))
! 			break
! 		    elif source.match("|"):
! 			b.append(p)
! 		    else:
! 			raise SyntaxError, "group not properly closed"
!  		if group:
!  		    subpattern.append((MARK, (group-1)*2+1))
! 	    else:
! 		# FIXME: should this really be a while loop?
!  		while source.get() not in (")", None):
!  		    pass
! 
! 	elif this == "^":
! 	    subpattern.append((AT, AT_BEGINNING))
! 
! 	elif this == "$":
! 	    subpattern.append((AT, AT_END))
! 
! 	elif this and this[0] == "\\":
! 	    code =_fixescape(this)
! 	    subpattern.append(code)
! 
! 	else:
! 	    raise SyntaxError, "parser error"
! 
!     return subpattern
! 
! def parse(source, flags=()):
!     s = Tokenizer(source)
!     g = Pattern()
!     b = []
!     while 1:
! 	p = _parse(s, g, flags)
! 	tail = s.get()
! 	if tail == "|":
! 	    b.append(p)
! 	elif tail == ")":
! 	    raise SyntaxError, "unbalanced parenthesis"
! 	elif tail is None:
! 	    if b:
! 		b.append(p)
! 		p = SubPattern(g)
! 		_branch(p, b)
! 	    break
! 	else:
! 	    raise SyntaxError, "bogus characters at end of regular expression"
!     return p
! 
! if __name__ == "__main__":
!     from pprint import pprint
!     from testpatterns import PATTERNS
!     a = b = c = 0
!     for pattern, flags in PATTERNS:
! 	if flags:
! 	    continue
! 	print "-"*68
! 	try:
! 	    p = parse(pattern)
! 	    print repr(pattern), "->"
! 	    pprint(p.data)
! 	    import sre_compile
! 	    try:
! 		code = sre_compile.compile(p)
! 		c = c + 1
! 	    except:
! 		pass
! 	    a = a + 1
! 	except SyntaxError, v:
! 	    print "**", repr(pattern), v
! 	b = b + 1
!     print "-"*68
!     print a, "of", b, "patterns successfully parsed"
!     print c, "of", b, "patterns successfully compiled"
! 
--- 1,492 ----
! #
! # Secret Labs' Regular Expression Engine
! # $Id: sre_parse.py,v 1.2 2000/04/02 05:22:30 akuchlin Exp $
! #
! # convert re-style regular expression to SRE template.  the current
! # implementation is somewhat incomplete, and not very fast.  should
! # definitely be rewritten before Python 1.6 goes beta.
! #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
! #
! # This code can only be used for 1.6 alpha testing.  All other use
! # require explicit permission from Secret Labs AB.
! #
! # Portions of this engine have been developed in cooperation with
! # CNRI.  Hewlett-Packard provided funding for 1.6 integration and
! # other compatibility work.
! #
! 
! # FIXME: comments marked with the FIXME tag are open issues.  all such
! # issues should be closed before the final beta.
! 
! import string, sys
! 
! from sre_constants import *
! 
! SPECIAL_CHARS = ".\\[{()*+?^$|"
! REPEAT_CHARS  = "*+?{"
! 
! OCTDIGITS = "01234567"
! HEXDIGITS = "0123456789abcdefABCDEF"
! 
! ESCAPES = {
!     "\\a": (LITERAL, chr(7)),
!     "\\b": (LITERAL, chr(8)),
!     "\\f": (LITERAL, chr(12)),
!     "\\n": (LITERAL, chr(10)),
!     "\\r": (LITERAL, chr(13)),
!     "\\t": (LITERAL, chr(9)),
!     "\\v": (LITERAL, chr(11))
! }
! 
! CATEGORIES = {
!     "\\A": (AT, AT_BEGINNING), # start of string
!     "\\b": (AT, AT_BOUNDARY),
!     "\\B": (AT, AT_NON_BOUNDARY),
!     "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
!     "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
!     "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
!     "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
!     "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
!     "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
!     "\\Z": (AT, AT_END), # end of string
! }
! 
! class Pattern:
!     # FIXME: <fl> rename class, and store flags in here too!
!     def __init__(self):
!         self.flags = []
!         self.groups = 1
!         self.groupdict = {}
!     def getgroup(self, name=None):
!         gid = self.groups
!         self.groups = gid + 1
!         if name:
!             self.groupdict[name] = gid
!         return gid
!     def setflag(self, flag):
!         if flag not in self.flags:
!             self.flags.append(flag)
! 
! class SubPattern:
!     # a subpattern, in intermediate form
!     def __init__(self, pattern, data=None):
!         self.pattern = pattern
!         if not data:
!             data = []
!         self.data = data
!         self.flags = []
!         self.width = None
!     def __repr__(self):
!         return repr(self.data)
!     def __len__(self):
!         return len(self.data)
!     def __delitem__(self, index):
!         del self.data[index]
!     def __getitem__(self, index):
!         return self.data[index]
!     def __setitem__(self, index, code):
!         self.data[index] = code
!     def __getslice__(self, start, stop):
!         return SubPattern(self.pattern, self.data[start:stop])
!     def insert(self, index, code):
!         self.data.insert(index, code)
!     def append(self, code):
!         self.data.append(code)
!     def getwidth(self):
!         # determine the width (min, max) for this subpattern
!         if self.width:
!             return self.width
!         lo = hi = 0L
!         for op, av in self.data:
!             if op is BRANCH:
!                 l = sys.maxint
!                 h = 0
!                 for av in av[1]:
!                     i, j = av.getwidth()
!                     l = min(l, i)
!                     h = min(h, j)
!                 lo = lo + i
!                 hi = hi + j
!             elif op is CALL:
!                 i, j = av.getwidth()
!                 lo = lo + i
!                 hi = hi + j
!             elif op is SUBPATTERN:
!                 i, j = av[1].getwidth()
!                 lo = lo + i
!                 hi = hi + j
!             elif op in (MIN_REPEAT, MAX_REPEAT):
!                 i, j = av[2].getwidth()
!                 lo = lo + i * av[0]
!                 hi = hi + j * av[1]
!             elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
!                 lo = lo + 1
!                 hi = hi + 1
!             elif op == SUCCESS:
!                 break
!         self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
!         return self.width
!     def set(self, flag):
!         if not flag in self.flags:
!             self.flags.append(flag)
!     def reset(self, flag):
!         if flag in self.flags:
!             self.flags.remove(flag)
! 
! class Tokenizer:
!     def __init__(self, string):
!         self.string = list(string)
!         self.next = self.__next()
!     def __next(self):
!         if not self.string:
!             return None
!         char = self.string[0]
!         if char[0] == "\\":
!             try:
!                 c = self.string[1]
!             except IndexError:
!                 raise SyntaxError, "bogus escape"
!             char = char + c
!             try:
!                 if c == "x":
!                     # hexadecimal constant
!                     for i in xrange(2, sys.maxint):
!                         c = self.string[i]
!                         if c not in HEXDIGITS:
!                             break
!                         char = char + c
!                 elif c in string.digits:
!                     # decimal (or octal) number
!                     for i in xrange(2, sys.maxint):
!                         c = self.string[i]
!                         # FIXME: if larger than current number of
!                         # groups, interpret as an octal number 
!                         if c not in string.digits:
!                             break
!                         char = char + c
!             except IndexError:
!                 pass # use what we've got this far
!         del self.string[0:len(char)]
!         return char
!     def match(self, char):
!         if char == self.next:
!             self.next = self.__next()
!             return 1
!         return 0
!     def match_set(self, set):
!         if self.next in set:
!             self.next = self.__next()
!             return 1
!         return 0
!     def get(self):
!         this = self.next
!         self.next = self.__next()
!         return this
! 
! def _fixescape(escape, character_class=0):
!     # convert escape to (type, value)
!     if character_class:
!         # inside a character class, we'll look in the character
!         # escapes dictionary first
!         code = ESCAPES.get(escape)
!         if code:
!             return code
!         code = CATEGORIES.get(escape)
!     else:
!         code = CATEGORIES.get(escape)
!         if code:
!             return code
!         code = ESCAPES.get(escape)
!     if code:
!         return code
!     if not character_class:
!         try:
!             group = int(escape[1:])
!             # FIXME: only valid if group <= current number of groups
!             return GROUP, group
!         except ValueError:
!             pass
!     try:
!         if escape[1:2] == "x":
!             escape = escape[2:]
!             return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
!         elif escape[1:2] in string.digits:
!             return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
!         elif len(escape) == 2:
!             return LITERAL, escape[1]
!     except ValueError:
!         pass
!     raise SyntaxError, "bogus escape: %s" % repr(escape)
! 
! def _branch(subpattern, items):
! 
!     # form a branch operator from a set of items (FIXME: move this
!     # optimization to the compiler module!)
! 
!     # check if all items share a common prefix
!     while 1:
!         prefix = None
!         for item in items:
!             if not item:
!                 break
!             if prefix is None:
!                 prefix = item[0]
!             elif item[0] != prefix:
!                 break
!         else:
!             # all subitems start with a common "prefix".
!             # move it out of the branch
!             for item in items:
!                 del item[0]
!             subpattern.append(prefix)
!             continue # check next one
!         break
! 
!     # check if the branch can be replaced by a character set
!     for item in items:
!         if len(item) != 1 or item[0][0] != LITERAL:
!             break
!     else:
!         # we can store this as a character set instead of a
!         # branch (FIXME: use a range if possible)
!         set = []
!         for item in items:
!             set.append(item[0])
!         subpattern.append((IN, set))
!         return
! 
!     subpattern.append((BRANCH, (None, items)))
! 
! def _parse(source, pattern, flags=()):
! 
!     # parse regular expression pattern into an operator list.
! 
!     subpattern = SubPattern(pattern)
! 
!     this = None
! 
!     while 1:
! 
!         if source.next in ("|", ")"):
!             break # end of subpattern
!         this = source.get()
!         if this is None:
!             break # end of pattern
! 
!         if this and this[0] not in SPECIAL_CHARS:
!             subpattern.append((LITERAL, this))
! 
!         elif this == "[":
!             # character set
!             set = []
! ##          if source.match(":"):
! ##              pass # handle character classes
!             if source.match("^"):
!                 set.append((NEGATE, None))
!             # check remaining characters
!             start = set[:]
!             while 1:
!                 this = source.get()
!                 if this == "]" and set != start:
!                     break
!                 elif this and this[0] == "\\":
!                     code1 = _fixescape(this, 1)
!                 elif this:
!                     code1 = LITERAL, this
!                 else:
!                     raise SyntaxError, "unexpected end of regular expression"
!                 if source.match("-"):
!                     # potential range
!                     this = source.get()
!                     if this == "]":
!                         set.append(code1)
!                         set.append((LITERAL, "-"))
!                         break
!                     else:
!                         if this[0] == "\\":
!                             code2 = _fixescape(this, 1)
!                         else:
!                             code2 = LITERAL, this
!                         if code1[0] != LITERAL or code2[0] != LITERAL:
!                             raise SyntaxError, "illegal range"
!                         if len(code1[1]) != 1 or len(code2[1]) != 1:
!                             raise SyntaxError, "illegal range"
!                         set.append((RANGE, (code1[1], code2[1])))
!                 else:
!                     if code1[0] is IN:
!                         code1 = code1[1][0]
!                     set.append(code1)
! 
!             # FIXME: <fl> move set optimization to support function
!             if len(set)==1 and set[0][0] is LITERAL:
!                 subpattern.append(set[0]) # optimization
!             elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
!                 subpattern.append((NOT_LITERAL, set[1][1])) # optimization
!             else:
!                 # FIXME: <fl> add charmap optimization
!                 subpattern.append((IN, set))
! 
!         elif this and this[0] in REPEAT_CHARS:
!             # repeat previous item
!             if this == "?":
!                 min, max = 0, 1
!             elif this == "*":
!                 min, max = 0, sys.maxint
!             elif this == "+":
!                 min, max = 1, sys.maxint
!             elif this == "{":
!                 min, max = 0, sys.maxint
!                 lo = hi = ""
!                 while source.next in string.digits:
!                     lo = lo + source.get()
!                 if source.match(","):
!                     while source.next in string.digits:
!                         hi = hi + source.get()
!                 else:
!                     hi = lo
!                 if not source.match("}"):
!                     raise SyntaxError, "bogus range"
!                 if lo:
!                     min = int(lo)
!                 if hi:
!                     max = int(hi)
!                 # FIXME: <fl> check that hi >= lo!
!             else:
!                 raise SyntaxError, "not supported"
!             # figure out which item to repeat
!             # FIXME: should back up to the right mark, right?
!             if subpattern:
!                 index = len(subpattern)-1
!                 while subpattern[index][0] is MARK:
!                     index = index - 1
!                 item = subpattern[index:index+1]
!             else:
!                 raise SyntaxError, "nothing to repeat"
!             if source.match("?"):
!                 subpattern[index] = (MIN_REPEAT, (min, max, item))
!             else:
!                 subpattern[index] = (MAX_REPEAT, (min, max, item))
!         elif this == ".":
!             subpattern.append((ANY, None))
!         elif this == "(":
!             group = 1
!             name = None
!             if source.match("?"):
!                 group = 0
!                 # options
!                 if source.match("P"):
!                     # named group: skip forward to end of name
!                     if source.match("<"):
!                         name = ""
!                         while 1:
!                             char = source.get()
!                             if char in (">", None):
!                                 break
!                             name = name + char
!                         group = 1
!                 elif source.match(":"):
!                     # non-capturing group
!                     group = 2
!                 elif source.match_set("iI"):
!                     pattern.setflag("i")
!                 elif source.match_set("lL"):
!                     pattern.setflag("l")
!                 elif source.match_set("mM"):
!                     pattern.setflag("m")
!                 elif source.match_set("sS"):
!                     pattern.setflag("s")
!                 elif source.match_set("xX"):
!                     pattern.setflag("x")
!             if group:
!                 # parse group contents
!                 b = []
!                 if group == 2:
!                     # anonymous group
!                     group = None
!                 else:
!                     group = pattern.getgroup(name)
!                 if group:
!                     subpattern.append((MARK, (group-1)*2))
!                 while 1:
!                     p = _parse(source, pattern, flags)
!                     if source.match(")"):
!                         if b:
!                             b.append(p)
!                             _branch(subpattern, b)
!                         else:
!                             subpattern.append((SUBPATTERN, (group, p)))
!                         break
!                     elif source.match("|"):
!                         b.append(p)
!                     else:
!                         raise SyntaxError, "group not properly closed"
!                 if group:
!                     subpattern.append((MARK, (group-1)*2+1))
!             else:
!                 # FIXME: should this really be a while loop?
!                 while source.get() not in (")", None):
!                     pass
! 
!         elif this == "^":
!             subpattern.append((AT, AT_BEGINNING))
! 
!         elif this == "$":
!             subpattern.append((AT, AT_END))
! 
!         elif this and this[0] == "\\":
!             code =_fixescape(this)
!             subpattern.append(code)
! 
!         else:
!             raise SyntaxError, "parser error"
! 
!     return subpattern
! 
! def parse(source, flags=()):
!     s = Tokenizer(source)
!     g = Pattern()
!     b = []
!     while 1:
!         p = _parse(s, g, flags)
!         tail = s.get()
!         if tail == "|":
!             b.append(p)
!         elif tail == ")":
!             raise SyntaxError, "unbalanced parenthesis"
!         elif tail is None:
!             if b:
!                 b.append(p)
!                 p = SubPattern(g)
!                 _branch(p, b)
!             break
!         else:
!             raise SyntaxError, "bogus characters at end of regular expression"
!     return p
! 
! if __name__ == "__main__":
!     from pprint import pprint
!     from testpatterns import PATTERNS
!     a = b = c = 0
!     for pattern, flags in PATTERNS:
!         if flags:
!             continue
!         print "-"*68
!         try:
!             p = parse(pattern)
!             print repr(pattern), "->"
!             pprint(p.data)
!             import sre_compile
!             try:
!                 code = sre_compile.compile(p)
!                 c = c + 1
!             except:
!                 pass
!             a = a + 1
!         except SyntaxError, v:
!             print "**", repr(pattern), v
!         b = b + 1
!     print "-"*68
!     print a, "of", b, "patterns successfully parsed"
!     print c, "of", b, "patterns successfully compiled"
!