[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.130,2.131

Thu, 21 Mar 2002 00:55:30 -0800

Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv3002/Objects

Modified Files:
	unicodeobject.c 
Log Message:
Do not insert characters for unicode-escape decoders if the error mode
is "ignore". Fixes #529104.


Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.130
retrieving revision 2.131
diff -C2 -d -r2.130 -r2.131
*** unicodeobject.c	28 Feb 2002 11:38:24 -0000	2.130
--- unicodeobject.c	21 Mar 2002 08:55:28 -0000	2.131
***************
*** 1515,1520 ****
  
  static
! int unicodeescape_decoding_error(const char **source,
!                                  Py_UNICODE *x,
                                   const char *errors,
                                   const char *details) 
--- 1515,1519 ----
  
  static
! int unicodeescape_decoding_error(Py_UNICODE **x,
                                   const char *errors,
                                   const char *details) 
***************
*** 1531,1535 ****
      }
      else if (strcmp(errors,"replace") == 0) {
!         *x = Py_UNICODE_REPLACEMENT_CHARACTER;
          return 0;
      }
--- 1530,1535 ----
      }
      else if (strcmp(errors,"replace") == 0) {
!         **x = Py_UNICODE_REPLACEMENT_CHARACTER;
! 	(*x)++;
          return 0;
      }
***************
*** 1629,1635 ****
                  c = (unsigned char) s[i];
                  if (!isxdigit(c)) {
!                     if (unicodeescape_decoding_error(&s, &x, errors, message))
                          goto onError;
!                     chr = x;
                      i++;
                      break;
--- 1629,1635 ----
                  c = (unsigned char) s[i];
                  if (!isxdigit(c)) {
!                     if (unicodeescape_decoding_error(&p, errors, message))
                          goto onError;
!                     chr = 0xffffffff;
                      i++;
                      break;
***************
*** 1644,1647 ****
--- 1644,1651 ----
              }
              s += i;
+ 	    if (chr == 0xffffffff)
+ 		    /* _decoding_error will have already written into the
+ 		       target buffer. */
+ 		    break;
          store:
              /* when we get here, chr is a 32-bit unicode character */
***************
*** 1661,1669 ****
              } else {
                  if (unicodeescape_decoding_error(
!                     &s, &x, errors,
                      "illegal Unicode character")
                      )
                      goto onError;
-                 *p++ = x; /* store replacement character */
              }
              break;
--- 1665,1672 ----
              } else {
                  if (unicodeescape_decoding_error(
!                     &p, errors,
                      "illegal Unicode character")
                      )
                      goto onError;
              }
              break;
***************
*** 1700,1711 ****
                  }
              }
!             if (unicodeescape_decoding_error(&s, &x, errors, message))
                  goto onError;
-             *p++ = x;
              break;
  
          default:
!             *p++ = '\\';
!             *p++ = (unsigned char)s[-1];
              break;
          }
--- 1703,1719 ----
                  }
              }
!             if (unicodeescape_decoding_error(&p, errors, message))
                  goto onError;
              break;
  
          default:
! 	    if (s > end) {
! 		if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
! 		    goto onError;
! 	    }
! 	    else {
! 		*p++ = '\\';
! 		*p++ = (unsigned char)s[-1];
! 	    }
              break;
          }
***************
*** 1910,1914 ****
      while (s < end) {
  	unsigned char c;
! 	Py_UNICODE x;
  	int i;
  
--- 1918,1922 ----
      while (s < end) {
  	unsigned char c;
! 	Py_UCS4 x;
  	int i;
  
***************
*** 1939,1945 ****
  	    c = (unsigned char)s[i];
  	    if (!isxdigit(c)) {
! 		if (unicodeescape_decoding_error(&s, &x, errors,
  						 "truncated \\uXXXX"))
  		    goto onError;
  		i++;
  		break;
--- 1947,1954 ----
  	    c = (unsigned char)s[i];
  	    if (!isxdigit(c)) {
! 		if (unicodeescape_decoding_error(&p, errors,
  						 "truncated \\uXXXX"))
  		    goto onError;
+ 		x = 0xffffffff;
  		i++;
  		break;
***************
*** 1954,1958 ****
  	}
  	s += i;
! 	*p++ = x;
      }
      if (_PyUnicode_Resize(&v, (int)(p - buf)))
--- 1963,1968 ----
  	}
  	s += i;
! 	if (x != 0xffffffff)
! 		*p++ = x;
      }
      if (_PyUnicode_Resize(&v, (int)(p - buf)))