[Python-checkins] CVS: python/dist/src/Lib sre.py,1.13,1.14 sre_compile.py,1.12,1.13 sre_parse.py,1.11,1.12

Fredrik Lundh python-dev@python.org
Thu, 29 Jun 2000 17:27:48 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv22449/Lib

Modified Files:
	sre.py sre_compile.py sre_parse.py 
Log Message:

- fixed split behaviour on empty matches

- fixed compiler problems when using locale/unicode flags

- fixed group/octal code parsing in sub/subn templates

Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -r1.13 -r1.14
*** sre.py	2000/06/29 18:03:25	1.13
--- sre.py	2000/06/30 00:27:45	1.14
***************
*** 110,123 ****
          if not m:
              break
!         j = m.start()
!         if j > i:
!             append(string[i:j])
          append(filter(m))
!         i = m.end()
! 	if i <= j:
! 	    break
          n = n + 1
!     if i < len(string):
!         append(string[i:])
      return string[:0].join(s), n
  
--- 110,120 ----
          if not m:
              break
! 	b, e = m.span()
!         if i < b:
!             append(string[i:b])
          append(filter(m))
! 	i = e
          n = n + 1
!     append(string[i:])
      return string[:0].join(s), n
  
***************
*** 129,133 ****
      extend = s.extend
      c = pattern.scanner(string)
!     g = c.groups
      while not maxsplit or n < maxsplit:
          m = c.search()
--- 126,130 ----
      extend = s.extend
      c = pattern.scanner(string)
!     g = pattern.groups
      while not maxsplit or n < maxsplit:
          m = c.search()

Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -r1.12 -r1.13
*** sre_compile.py	2000/06/29 23:33:11	1.12
--- sre_compile.py	2000/06/30 00:27:45	1.13
***************
*** 62,68 ****
  	    emit(OPCODES[op])
  	    if flags & SRE_FLAG_LOCALE:
! 		emit(CH_LOCALE[CHCODES[av]])
  	    elif flags & SRE_FLAG_UNICODE:
! 		emit(CH_UNICODE[CHCODES[av]])
  	    else:
  		emit(CHCODES[av])
--- 62,68 ----
  	    emit(OPCODES[op])
  	    if flags & SRE_FLAG_LOCALE:
! 		emit(CHCODES[CH_LOCALE[av]])
  	    elif flags & SRE_FLAG_UNICODE:
! 		emit(CHCODES[CH_UNICODE[av]])
  	    else:
  		emit(CHCODES[av])
***************
*** 93,99 ****
  		elif op is CATEGORY:
  		    if flags & SRE_FLAG_LOCALE:
! 			emit(CH_LOCALE[CHCODES[av]])
  		    elif flags & SRE_FLAG_UNICODE:
! 			emit(CH_UNICODE[CHCODES[av]])
  		    else:
  			emit(CHCODES[av])
--- 93,99 ----
  		elif op is CATEGORY:
  		    if flags & SRE_FLAG_LOCALE:
! 			emit(CHCODES[CH_LOCALE[av]])
  		    elif flags & SRE_FLAG_UNICODE:
! 			emit(CHCODES[CH_UNICODE[av]])
  		    else:
  			emit(CHCODES[av])

Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -r1.11 -r1.12
*** sre_parse.py	2000/06/29 18:03:25	1.11
--- sre_parse.py	2000/06/30 00:27:45	1.12
***************
*** 31,54 ****
  
  ESCAPES = {
!     "\\a": (LITERAL, chr(7)),
!     "\\b": (LITERAL, chr(8)),
!     "\\f": (LITERAL, chr(12)),
!     "\\n": (LITERAL, chr(10)),
!     "\\r": (LITERAL, chr(13)),
!     "\\t": (LITERAL, chr(9)),
!     "\\v": (LITERAL, chr(11))
  }
  
  CATEGORIES = {
!     "\\A": (AT, AT_BEGINNING), # start of string
!     "\\b": (AT, AT_BOUNDARY),
!     "\\B": (AT, AT_NON_BOUNDARY),
!     "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
!     "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
!     "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
!     "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
!     "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
!     "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
!     "\\Z": (AT, AT_END), # end of string
  }
  
--- 31,55 ----
  
  ESCAPES = {
!     r"\a": (LITERAL, chr(7)),
!     r"\b": (LITERAL, chr(8)),
!     r"\f": (LITERAL, chr(12)),
!     r"\n": (LITERAL, chr(10)),
!     r"\r": (LITERAL, chr(13)),
!     r"\t": (LITERAL, chr(9)),
!     r"\v": (LITERAL, chr(11)),
!     r"\\": (LITERAL, "\\")
  }
  
  CATEGORIES = {
!     r"\A": (AT, AT_BEGINNING), # start of string
!     r"\b": (AT, AT_BOUNDARY),
!     r"\B": (AT, AT_NON_BOUNDARY),
!     r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
!     r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
!     r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
!     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
!     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
!     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
!     r"\Z": (AT, AT_END), # end of string
  }
  
***************
*** 186,194 ****
      return 1
  
! def _group(escape, state):
      # check if the escape string represents a valid group
      try:
  	group = int(escape[1:])
! 	if group and group < state.groups:
  	    return group
      except ValueError:
--- 187,195 ----
      return 1
  
! def _group(escape, groups):
      # check if the escape string represents a valid group
      try:
  	group = int(escape[1:])
! 	if group and group < groups:
  	    return group
      except ValueError:
***************
*** 240,247 ****
  	elif escape[1:2] in DIGITS:
  	    while 1:
! 		group = _group(escape, state)
  		if group:
  		    if (not source.next or
! 			not _group(escape + source.next, state)):
  		        return GROUP, group
  		    escape = escape + source.get()
--- 241,248 ----
  	elif escape[1:2] in DIGITS:
  	    while 1:
! 		group = _group(escape, state.groups)
  		if group:
  		    if (not source.next or
! 			not _group(escape + source.next, state.groups)):
  		        return GROUP, group
  		    escape = escape + source.get()
***************
*** 535,538 ****
--- 536,540 ----
  	    break # end of replacement string
  	if this and this[0] == "\\":
+ 	    # group
  	    if this == "\\g":
  		name = ""
***************
*** 558,570 ****
  		a((MARK, index))
  	    elif len(this) > 1 and this[1] in DIGITS:
! 		while s.next in DIGITS:
! 		    this = this + s.get()
! 		a((MARK, int(this[1:])))
  	    else:
  		try:
  		    a(ESCAPES[this])
  		except KeyError:
! 		    for char in this:
! 			a((LITERAL, char))
  	else:
  	    a((LITERAL, this))
--- 560,586 ----
  		a((MARK, index))
  	    elif len(this) > 1 and this[1] in DIGITS:
! 		code = None
! 		while 1:
! 		    group = _group(this, pattern.groups+1)
! 		    if group:
! 			if (not s.next or
! 			    not _group(this + s.next, pattern.groups+1)):
! 		            code = MARK, int(group)
! 			    break
! 		    elif s.next in OCTDIGITS:
! 			this = this + s.get()
! 		    else:
! 			break
! 		if not code:
! 		    this = this[1:]
! 		    # FIXME: support unicode characters!
! 		    code = LITERAL, chr(int(this[-6:], 8) & 0xff)
! 		a(code)
  	    else:
  		try:
  		    a(ESCAPES[this])
  		except KeyError:
! 		    for c in this:
! 			a((LITERAL, c))
  	else:
  	    a((LITERAL, this))