[Python-checkins] CVS: python/dist/src/Lib sre.py,1.25,1.25.2.1 sre_compile.py,1.31,1.31.2.1 sre_constants.py,1.21,1.21.2.1 sre_parse.py,1.37,1.37.2.1

Guido van Rossum gvanrossum@users.sourceforge.net
Wed, 13 Jun 2001 08:15:04 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv31452/Lib

Modified Files:
      Tag: release20-maint
	sre.py sre_compile.py sre_constants.py sre_parse.py 
Log Message:
Bring SRE up do date with Python 2.1

Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.25
retrieving revision 1.25.2.1
diff -C2 -r1.25 -r1.25.2.1
*** sre.py	2000/09/21 17:03:24	1.25
--- sre.py	2001/06/13 15:15:02	1.25.2.1
***************
*** 4,8 ****
  # re-compatible interface for the sre matching engine
  #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
  #
  # This version of the SRE library can be redistributed under CNRI's
--- 4,8 ----
  # re-compatible interface for the sre matching engine
  #
! # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
  #
  # This version of the SRE library can be redistributed under CNRI's
***************
*** 15,35 ****
  #
  
- # FIXME: change all FIXME's to XXX ;-)
- 
  import sre_compile
  import sre_parse
  
  import string
  
  # flags
! I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
! L = LOCALE = sre_compile.SRE_FLAG_LOCALE
! M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
! S = DOTALL = sre_compile.SRE_FLAG_DOTALL
! X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
! 
! # sre extensions (may or may not be in 1.6/2.0 final)
! T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
! U = UNICODE = sre_compile.SRE_FLAG_UNICODE
  
  # sre exception
--- 15,43 ----
  #
  
  import sre_compile
  import sre_parse
  
+ # public symbols
+ __all__ = [ "match", "search", "sub", "subn", "split", "findall",
+     "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
+     "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
+     "UNICODE", "error" ]
+ 
+ __version__ = "2.1b2"
+ 
+ # this module works under 1.5.2 and later.  don't use string methods
  import string
  
  # flags
! I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
! L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
! U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
! M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
! S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
! X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
! 
! # sre extensions (experimental, don't rely on these)
! T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
! DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
  
  # sre exception
***************
*** 39,72 ****
  # public interface
  
- # FIXME: add docstrings
- 
  def match(pattern, string, flags=0):
      return _compile(pattern, flags).match(string)
  
  def search(pattern, string, flags=0):
      return _compile(pattern, flags).search(string)
  
  def sub(pattern, repl, string, count=0):
      return _compile(pattern, 0).sub(repl, string, count)
  
  def subn(pattern, repl, string, count=0):
      return _compile(pattern, 0).subn(repl, string, count)
  
  def split(pattern, string, maxsplit=0):
      return _compile(pattern, 0).split(string, maxsplit)
  
  def findall(pattern, string, maxsplit=0):
      return _compile(pattern, 0).findall(string, maxsplit)
  
  def compile(pattern, flags=0):
      return _compile(pattern, flags)
  
  def purge():
      _cache.clear()
  
  def template(pattern, flags=0):
      return _compile(pattern, flags|T)
  
  def escape(pattern):
      s = list(pattern)
      for i in range(len(pattern)):
--- 47,104 ----
  # public interface
  
  def match(pattern, string, flags=0):
+     """Try to apply the pattern at the start of the string, returning
+     a match object, or None if no match was found."""
      return _compile(pattern, flags).match(string)
  
  def search(pattern, string, flags=0):
+     """Scan through string looking for a match to the pattern, returning
+     a match object, or None if no match was found."""
      return _compile(pattern, flags).search(string)
  
  def sub(pattern, repl, string, count=0):
+     """Return the string obtained by replacing the leftmost
+     non-overlapping occurrences of the pattern in string by the
+     replacement repl"""
      return _compile(pattern, 0).sub(repl, string, count)
  
  def subn(pattern, repl, string, count=0):
+     """Return a 2-tuple containing (new_string, number).
+     new_string is the string obtained by replacing the leftmost
+     non-overlapping occurrences of the pattern in the source
+     string by the replacement repl.  number is the number of
+     substitutions that were made."""
      return _compile(pattern, 0).subn(repl, string, count)
  
  def split(pattern, string, maxsplit=0):
+     """Split the source string by the occurrences of the pattern,
+     returning a list containing the resulting substrings."""
      return _compile(pattern, 0).split(string, maxsplit)
  
  def findall(pattern, string, maxsplit=0):
+     """Return a list of all non-overlapping matches in the string.
+ 
+     If one or more groups are present in the pattern, return a
+     list of groups; this will be a list of tuples if the pattern
+     has more than one group.
+ 
+     Empty matches are included in the result."""
      return _compile(pattern, 0).findall(string, maxsplit)
  
  def compile(pattern, flags=0):
+     "Compile a regular expression pattern, returning a pattern object."
      return _compile(pattern, flags)
  
  def purge():
+     "Clear the regular expression cache"
      _cache.clear()
+     _cache_repl.clear()
  
  def template(pattern, flags=0):
+     "Compile a template pattern, returning a pattern object"
      return _compile(pattern, flags|T)
  
  def escape(pattern):
+     "Escape all non-alphanumeric characters in pattern."
      s = list(pattern)
      for i in range(len(pattern)):
***************
*** 83,86 ****
--- 115,120 ----
  
  _cache = {}
+ _cache_repl = {}
+ 
  _MAXCACHE = 100
  
***************
*** 106,109 ****
--- 140,158 ----
      return p
  
+ def _compile_repl(*key):
+     # internal: compile replacement pattern
+     p = _cache_repl.get(key)
+     if p is not None:
+         return p
+     repl, pattern = key
+     try:
+         p = sre_parse.parse_template(repl, pattern)
+     except error, v:
+         raise error, v # invalid expression
+     if len(_cache_repl) >= _MAXCACHE:
+         _cache_repl.clear()
+     _cache_repl[key] = p
+     return p
+ 
  def _expand(pattern, match, template):
      # internal: match.expand implementation hook
***************
*** 120,124 ****
          filter = template
      else:
!         template = sre_parse.parse_template(template, pattern)
          def filter(match, template=template):
              return sre_parse.expand_template(template, match)
--- 169,173 ----
          filter = template
      else:
!         template = _compile_repl(template, pattern)
          def filter(match, template=template):
              return sre_parse.expand_template(template, match)
***************
*** 159,163 ****
          append(string[i:b])
          if g and b != e:
!             extend(m.groups())
          i = e
          n = n + 1
--- 208,212 ----
          append(string[i:b])
          if g and b != e:
!             extend(list(m.groups()))
          i = e
          n = n + 1
***************
*** 205,209 ****
              action = self.lexicon[m.lastindex][1]
              if callable(action):
!                 self.match = match
                  action = action(self, m.group())
              if action is not None:
--- 254,258 ----
              action = self.lexicon[m.lastindex][1]
              if callable(action):
!                 self.match = m
                  action = action(self, m.group())
              if action is not None:

Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.31
retrieving revision 1.31.2.1
diff -C2 -r1.31 -r1.31.2.1
*** sre_compile.py	2000/10/07 17:38:22	1.31
--- sre_compile.py	2001/06/13 15:15:02	1.31.2.1
***************
*** 4,8 ****
  # convert template to internal format
  #
! # Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
--- 4,8 ----
  # convert template to internal format
  #
! # Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
***************
*** 13,16 ****
--- 13,18 ----
  from sre_constants import *
  
+ assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+ 
  MAXCODE = 65535
  
***************
*** 22,28 ****
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
              else:
                  emit(OPCODES[op])
!             emit(av)
          elif op is IN:
              if flags & SRE_FLAG_IGNORECASE:
--- 24,31 ----
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
+                 emit(_sre.getlower(av, flags))
              else:
                  emit(OPCODES[op])
!                 emit(av)
          elif op is IN:
              if flags & SRE_FLAG_IGNORECASE:
***************
*** 103,109 ****
              emit(OPCODES[op])
              if flags & SRE_FLAG_MULTILINE:
!                 emit(ATCODES[AT_MULTILINE.get(av, av)])
!             else:
!                 emit(ATCODES[av])
          elif op is BRANCH:
              emit(OPCODES[op])
--- 106,115 ----
              emit(OPCODES[op])
              if flags & SRE_FLAG_MULTILINE:
!                 av = AT_MULTILINE.get(av, av)
!             if flags & SRE_FLAG_LOCALE:
!                 av = AT_LOCALE.get(av, av)
!             elif flags & SRE_FLAG_UNICODE:
!                 av = AT_UNICODE.get(av, av)
!             emit(ATCODES[av])
          elif op is BRANCH:
              emit(OPCODES[op])
***************
*** 122,130 ****
              emit(OPCODES[op])
              if flags & SRE_FLAG_LOCALE:
!                 emit(CHCODES[CH_LOCALE[av]])
              elif flags & SRE_FLAG_UNICODE:
!                 emit(CHCODES[CH_UNICODE[av]])
!             else:
!                 emit(CHCODES[av])
          elif op is GROUPREF:
              if flags & SRE_FLAG_IGNORECASE:
--- 128,135 ----
              emit(OPCODES[op])
              if flags & SRE_FLAG_LOCALE:
!                 av = CH_LOCALE[av]
              elif flags & SRE_FLAG_UNICODE:
!                 av = CH_UNICODE[av]
!             emit(CHCODES[av])
          elif op is GROUPREF:
              if flags & SRE_FLAG_IGNORECASE:
***************
*** 177,181 ****
                      charmap[i] = 1
              elif op is CATEGORY:
!                 # FIXME: could append to charmap tail
                  return charset # cannot compress
      except IndexError:
--- 182,186 ----
                      charmap[i] = 1
              elif op is CATEGORY:
!                 # XXX: could append to charmap tail
                  return charset # cannot compress
      except IndexError:
***************
*** 365,369 ****
      # print code
  
!     # FIXME: <fl> get rid of this limitation!
      assert p.pattern.groups <= 100,\
             "sorry, but this version only supports 100 named groups"
--- 370,374 ----
      # print code
  
!     # XXX: <fl> get rid of this limitation!
      assert p.pattern.groups <= 100,\
             "sorry, but this version only supports 100 named groups"

Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.21
retrieving revision 1.21.2.1
diff -C2 -r1.21 -r1.21.2.1
*** sre_constants.py	2000/10/07 17:38:22	1.21
--- sre_constants.py	2001/06/13 15:15:02	1.21.2.1
***************
*** 5,15 ****
  # run this script to update the _sre include files!
  #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
  
  MAXREPEAT = 65535
  
  # should this really be here?
  
--- 5,22 ----
  # run this script to update the _sre include files!
  #
! # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
  
+ # update when constants are added or removed
+ 
+ MAGIC = 20010320
+ 
+ # max code word in this release
+ 
  MAXREPEAT = 65535
  
+ # SRE standard exception (access as sre.error)
  # should this really be here?
  
***************
*** 55,62 ****
--- 62,75 ----
  AT_BEGINNING = "at_beginning"
  AT_BEGINNING_LINE = "at_beginning_line"
+ AT_BEGINNING_STRING = "at_beginning_string"
  AT_BOUNDARY = "at_boundary"
  AT_NON_BOUNDARY = "at_non_boundary"
  AT_END = "at_end"
  AT_END_LINE = "at_end_line"
+ AT_END_STRING = "at_end_string"
+ AT_LOC_BOUNDARY = "at_loc_boundary"
+ AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
+ AT_UNI_BOUNDARY = "at_uni_boundary"
+ AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
  
  # categories
***************
*** 110,115 ****
  
  ATCODES = [
!     AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
!     AT_NON_BOUNDARY, AT_END, AT_END_LINE
  ]
  
--- 123,130 ----
  
  ATCODES = [
!     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
!     AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
!     AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
!     AT_UNI_NON_BOUNDARY
  ]
  
***************
*** 149,152 ****
--- 164,177 ----
  }
  
+ AT_LOCALE = {
+     AT_BOUNDARY: AT_LOC_BOUNDARY,
+     AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+ }
+ 
+ AT_UNICODE = {
+     AT_BOUNDARY: AT_UNI_BOUNDARY,
+     AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
+ }
+ 
  CH_LOCALE = {
      CATEGORY_DIGIT: CATEGORY_DIGIT,
***************
*** 179,182 ****
--- 204,208 ----
  SRE_FLAG_UNICODE = 32 # use unicode locale
  SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+ SRE_FLAG_DEBUG = 128 # debugging
  
  # flags for INFO primitive
***************
*** 202,206 ****
   * to change anything in here, edit sre_constants.py and run it.
   *
!  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
   *
   * See the _sre.c file for information on usage and redistribution.
--- 228,232 ----
   * to change anything in here, edit sre_constants.py and run it.
   *
!  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
   *
   * See the _sre.c file for information on usage and redistribution.
***************
*** 208,211 ****
--- 234,239 ----
  
  """)
+ 
+     f.write("#define SRE_MAGIC %d\n" % MAGIC)
  
      dump(f, OPCODES, "SRE_OP")

Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.37
retrieving revision 1.37.2.1
diff -C2 -r1.37 -r1.37.2.1
*** sre_parse.py	2000/10/07 17:38:22	1.37
--- sre_parse.py	2001/06/13 15:15:02	1.37.2.1
***************
*** 4,12 ****
  # convert re-style regular expression to sre pattern
  #
! # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
  
  import string, sys
  
--- 4,15 ----
  # convert re-style regular expression to sre pattern
  #
! # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
  
+ # XXX: show string offset and offending character for all errors
+ 
+ # this module works under 1.5.2 and later.  don't use string methods
  import string, sys
  
***************
*** 24,39 ****
  
  ESCAPES = {
!     r"\a": (LITERAL, 7),
!     r"\b": (LITERAL, 8),
!     r"\f": (LITERAL, 12),
!     r"\n": (LITERAL, 10),
!     r"\r": (LITERAL, 13),
!     r"\t": (LITERAL, 9),
!     r"\v": (LITERAL, 11),
      r"\\": (LITERAL, ord("\\"))
  }
  
  CATEGORIES = {
!     r"\A": (AT, AT_BEGINNING), # start of string
      r"\b": (AT, AT_BOUNDARY),
      r"\B": (AT, AT_NON_BOUNDARY),
--- 27,42 ----
  
  ESCAPES = {
!     r"\a": (LITERAL, ord("\a")),
!     r"\b": (LITERAL, ord("\b")),
!     r"\f": (LITERAL, ord("\f")),
!     r"\n": (LITERAL, ord("\n")),
!     r"\r": (LITERAL, ord("\r")),
!     r"\t": (LITERAL, ord("\t")),
!     r"\v": (LITERAL, ord("\v")),
      r"\\": (LITERAL, ord("\\"))
  }
  
  CATEGORIES = {
!     r"\A": (AT, AT_BEGINNING_STRING), # start of string
      r"\b": (AT, AT_BOUNDARY),
      r"\B": (AT, AT_NON_BOUNDARY),
***************
*** 44,48 ****
      r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
      r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
!     r"\Z": (AT, AT_END), # end of string
  }
  
--- 47,51 ----
      r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
      r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
!     r"\Z": (AT, AT_END_STRING), # end of string
  }
  
***************
*** 59,74 ****
  }
  
  class Pattern:
      # master pattern object.  keeps track of global attributes
      def __init__(self):
          self.flags = 0
          self.groups = 1
          self.groupdict = {}
!     def getgroup(self, name=None):
          gid = self.groups
          self.groups = gid + 1
          if name:
              self.groupdict[name] = gid
          return gid
  
  class SubPattern:
--- 62,90 ----
  }
  
+ # figure out best way to convert hex/octal numbers to integers
+ try:
+     int("10", 8)
+     atoi = int # 2.0 and later
+ except TypeError:
+     atoi = string.atoi # 1.5.2
+ 
  class Pattern:
      # master pattern object.  keeps track of global attributes
      def __init__(self):
          self.flags = 0
+         self.open = []
          self.groups = 1
          self.groupdict = {}
!     def opengroup(self, name=None):
          gid = self.groups
          self.groups = gid + 1
          if name:
              self.groupdict[name] = gid
+         self.open.append(gid)
          return gid
+     def closegroup(self, gid):
+         self.open.remove(gid)
+     def checkgroup(self, gid):
+         return gid < self.groups and gid not in self.open
  
  class SubPattern:
***************
*** 209,213 ****
      # check if the escape string represents a valid group
      try:
!         gid = int(escape[1:])
          if gid and gid < groups:
              return gid
--- 225,229 ----
      # check if the escape string represents a valid group
      try:
!         gid = atoi(escape[1:])
          if gid and gid < groups:
              return gid
***************
*** 232,236 ****
              if len(escape) != 2:
                  raise error, "bogus escape: %s" % repr("\\" + escape)
!             return LITERAL, int(escape, 16) & 0xff
          elif str(escape[1:2]) in OCTDIGITS:
              # octal escape (up to three digits)
--- 248,252 ----
              if len(escape) != 2:
                  raise error, "bogus escape: %s" % repr("\\" + escape)
!             return LITERAL, atoi(escape, 16) & 0xff
          elif str(escape[1:2]) in OCTDIGITS:
              # octal escape (up to three digits)
***************
*** 238,242 ****
                  escape = escape + source.get()
              escape = escape[1:]
!             return LITERAL, int(escape, 8) & 0xff
          if len(escape) == 2:
              return LITERAL, ord(escape[1])
--- 254,258 ----
                  escape = escape + source.get()
              escape = escape[1:]
!             return LITERAL, atoi(escape, 8) & 0xff
          if len(escape) == 2:
              return LITERAL, ord(escape[1])
***************
*** 260,269 ****
              if len(escape) != 4:
                  raise ValueError
!             return LITERAL, int(escape[2:], 16) & 0xff
          elif escape[1:2] == "0":
              # octal escape
              while source.next in OCTDIGITS and len(escape) < 4:
                  escape = escape + source.get()
!             return LITERAL, int(escape[1:], 8) & 0xff
          elif escape[1:2] in DIGITS:
              # octal escape *or* decimal group reference (sigh)
--- 276,285 ----
              if len(escape) != 4:
                  raise ValueError
!             return LITERAL, atoi(escape[2:], 16) & 0xff
          elif escape[1:2] == "0":
              # octal escape
              while source.next in OCTDIGITS and len(escape) < 4:
                  escape = escape + source.get()
!             return LITERAL, atoi(escape[1:], 8) & 0xff
          elif escape[1:2] in DIGITS:
              # octal escape *or* decimal group reference (sigh)
***************
*** 275,282 ****
                      # got three octal digits; this is an octal escape
                      escape = escape + source.get()
!                     return LITERAL, int(escape[1:], 8) & 0xff
              # got at least one decimal digit; this is a group reference
              group = _group(escape, state.groups)
              if group:
                  return GROUPREF, group
              raise ValueError
--- 291,300 ----
                      # got three octal digits; this is an octal escape
                      escape = escape + source.get()
!                     return LITERAL, atoi(escape[1:], 8) & 0xff
              # got at least one decimal digit; this is a group reference
              group = _group(escape, state.groups)
              if group:
+                 if not state.checkgroup(group):
+                     raise error, "cannot refer to open group"
                  return GROUPREF, group
              raise ValueError
***************
*** 403,411 ****
                              code2 = LITERAL, ord(this)
                          if code1[0] != LITERAL or code2[0] != LITERAL:
!                             raise error, "illegal range"
                          lo = code1[1]
                          hi = code2[1]
                          if hi < lo:
!                             raise error, "illegal range"
                          set.append((RANGE, (lo, hi)))
                  else:
--- 421,429 ----
                              code2 = LITERAL, ord(this)
                          if code1[0] != LITERAL or code2[0] != LITERAL:
!                             raise error, "bad character range"
                          lo = code1[1]
                          hi = code2[1]
                          if hi < lo:
!                             raise error, "bad character range"
                          set.append((RANGE, (lo, hi)))
                  else:
***************
*** 414,418 ****
                      set.append(code1)
  
!             # FIXME: <fl> move set optimization to compiler!
              if len(set)==1 and set[0][0] is LITERAL:
                  subpattern.append(set[0]) # optimization
--- 432,436 ----
                      set.append(code1)
  
!             # XXX: <fl> should move set optimization to compiler!
              if len(set)==1 and set[0][0] is LITERAL:
                  subpattern.append(set[0]) # optimization
***************
*** 420,424 ****
                  subpattern.append((NOT_LITERAL, set[1][1])) # optimization
              else:
!                 # FIXME: <fl> add charmap optimization
                  subpattern.append((IN, set))
  
--- 438,442 ----
                  subpattern.append((NOT_LITERAL, set[1][1])) # optimization
              else:
!                 # XXX: <fl> should add charmap optimization here
                  subpattern.append((IN, set))
  
***************
*** 429,432 ****
--- 447,451 ----
              elif this == "*":
                  min, max = 0, MAXREPEAT
+ 
              elif this == "+":
                  min, max = 1, MAXREPEAT
***************
*** 447,454 ****
                      continue
                  if lo:
!                     min = int(lo)
                  if hi:
!                     max = int(hi)
!                 # FIXME: <fl> check that hi >= lo!
              else:
                  raise error, "not supported"
--- 466,474 ----
                      continue
                  if lo:
!                     min = atoi(lo)
                  if hi:
!                     max = atoi(hi)
!                 if max < min:
!                     raise error, "bad repeat interval"
              else:
                  raise error, "not supported"
***************
*** 457,461 ****
--- 477,485 ----
                  item = subpattern[-1:]
              else:
+                 item = None
+             if not item or (len(item) == 1 and item[0][0] == AT):
                  raise error, "nothing to repeat"
+             if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
+                 raise error, "multiple repeat"
              if source.match("?"):
                  subpattern[-1] = (MIN_REPEAT, (min, max, item))
***************
*** 486,490 ****
                          group = 1
                          if not isname(name):
!                             raise error, "illegal character in group name"
                      elif source.match("="):
                          # named backreference
--- 510,514 ----
                          group = 1
                          if not isname(name):
!                             raise error, "bad character in group name"
                      elif source.match("="):
                          # named backreference
***************
*** 498,502 ****
                              name = name + char
                          if not isname(name):
!                             raise error, "illegal character in group name"
                          gid = state.groupdict.get(name)
                          if gid is None:
--- 522,526 ----
                              name = name + char
                          if not isname(name):
!                             raise error, "bad character in group name"
                          gid = state.groupdict.get(name)
                          if gid is None:
***************
*** 540,543 ****
--- 564,569 ----
                  else:
                      # flags
+                     if not FLAGS.has_key(source.next):
+                         raise error, "unexpected end of pattern"
                      while FLAGS.has_key(source.next):
                          state.flags = state.flags | FLAGS[source.get()]
***************
*** 548,560 ****
                      group = None
                  else:
!                     group = state.getgroup(name)
                  p = _parse_sub(source, state)
                  if not source.match(")"):
                      raise error, "unbalanced parenthesis"
                  subpattern.append((SUBPATTERN, (group, p)))
              else:
                  while 1:
                      char = source.get()
!                     if char is None or char == ")":
                          break
                      raise error, "unknown extension"
--- 574,590 ----
                      group = None
                  else:
!                     group = state.opengroup(name)
                  p = _parse_sub(source, state)
                  if not source.match(")"):
                      raise error, "unbalanced parenthesis"
+                 if group is not None:
+                     state.closegroup(group)
                  subpattern.append((SUBPATTERN, (group, p)))
              else:
                  while 1:
                      char = source.get()
!                     if char is None:
!                         raise error, "unexpected end of pattern"
!                     if char == ")":
                          break
                      raise error, "unknown extension"
***************
*** 583,586 ****
--- 613,617 ----
          pattern = Pattern()
      pattern.flags = flags
+     pattern.str = str
  
      p = _parse_sub(source, pattern, 0)
***************
*** 592,596 ****
          raise error, "bogus characters at end of regular expression"
  
!     # p.dump()
  
      if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
--- 623,628 ----
          raise error, "bogus characters at end of regular expression"
  
!     if flags & SRE_FLAG_DEBUG:
!         p.dump()
  
      if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
***************
*** 607,610 ****
--- 639,652 ----
      p = []
      a = p.append
+     def literal(literal, p=p):
+         if p and p[-1][0] is LITERAL:
+             p[-1] = LITERAL, p[-1][1] + literal
+         else:
+             p.append((LITERAL, literal))
+     sep = source[:0]
+     if type(sep) is type(""):
+         char = chr
+     else:
+         char = unichr
      while 1:
          this = s.get()
***************
*** 626,633 ****
                      raise error, "bad group name"
                  try:
!                     index = int(name)
                  except ValueError:
                      if not isname(name):
!                         raise error, "illegal character in group name"
                      try:
                          index = pattern.groupindex[name]
--- 668,675 ----
                      raise error, "bad group name"
                  try:
!                     index = atoi(name)
                  except ValueError:
                      if not isname(name):
!                         raise error, "bad character in group name"
                      try:
                          index = pattern.groupindex[name]
***************
*** 642,646 ****
                          if (s.next not in DIGITS or
                              not _group(this + s.next, pattern.groups+1)):
!                             code = MARK, int(group)
                              break
                      elif s.next in OCTDIGITS:
--- 684,688 ----
                          if (s.next not in DIGITS or
                              not _group(this + s.next, pattern.groups+1)):
!                             code = MARK, group
                              break
                      elif s.next in OCTDIGITS:
***************
*** 650,682 ****
                  if not code:
                      this = this[1:]
!                     code = LITERAL, int(this[-6:], 8) & 0xff
!                 a(code)
              else:
                  try:
!                     a(ESCAPES[this])
                  except KeyError:
!                     for c in this:
!                         a((LITERAL, ord(c)))
          else:
!             a((LITERAL, ord(this)))
!     return p
  
  def expand_template(template, match):
!     # FIXME: <fl> this is sooooo slow.  drop in the slicelist
!     # code instead
!     p = []
!     a = p.append
      sep = match.string[:0]
!     if type(sep) is type(""):
!         char = chr
!     else:
!         char = unichr
!     for c, s in template:
!         if c is LITERAL:
!             a(char(s))
!         elif c is MARK:
!             s = match.group(s)
              if s is None:
!                 raise error, "empty group"
!             a(s)
!     return string.join(p, sep)
--- 692,732 ----
                  if not code:
                      this = this[1:]
!                     code = LITERAL, char(atoi(this[-6:], 8) & 0xff)
!                 if code[0] is LITERAL:
!                     literal(code[1])
!                 else:
!                     a(code)
              else:
                  try:
!                     this = char(ESCAPES[this][1])
                  except KeyError:
!                     pass
!                 literal(this)
          else:
!             literal(this)
!     # convert template to groups and literals lists
!     i = 0
!     groups = []
!     literals = []
!     for c, s in p:
!         if c is MARK:
!             groups.append((i, s))
!             literals.append(None)
!         else:
!             literals.append(s)
!         i = i + 1
!     return groups, literals
  
  def expand_template(template, match):
!     g = match.group
      sep = match.string[:0]
!     groups, literals = template
!     literals = literals[:]
!     try:
!         for index, group in groups:
!             literals[index] = s = g(group)
              if s is None:
!                 raise IndexError
!     except IndexError:
!         raise error, "empty group"
!     return string.join(literals, sep)