[Python-checkins] python/dist/src/Objects unicodeobject.c,2.187,2.188

loewis@users.sourceforge.net loewis@users.sourceforge.net
Sun, 18 May 2003 05:31:11 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory sc8-pr-cvs1:/tmp/cvs-serv31612/Objects

Modified Files:
	unicodeobject.c 
Log Message:
Consider \U-escapes in raw-unicode-escape. Fixes #444514.


Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.187
retrieving revision 2.188
diff -C2 -d -r2.187 -r2.188
*** unicodeobject.c	10 Apr 2003 22:35:32 -0000	2.187
--- unicodeobject.c	18 May 2003 12:31:09 -0000	2.188
***************
*** 2031,2034 ****
--- 2031,2035 ----
  	Py_UCS4 x;
  	int i;
+         int count;
  
  	/* Non-escape characters are interpreted as Unicode ordinals */
***************
*** 2049,2061 ****
  	if (((s - bs) & 1) == 0 ||
  	    s >= end ||
! 	    *s != 'u') {
  	    continue;
  	}
  	p--;
  	s++;
  
! 	/* \uXXXX with 4 hex digits */
  	outpos = p-PyUnicode_AS_UNICODE(v);
! 	for (x = 0, i = 0; i < 4; ++i, ++s) {
  	    c = (unsigned char)*s;
  	    if (!isxdigit(c)) {
--- 2050,2063 ----
  	if (((s - bs) & 1) == 0 ||
  	    s >= end ||
! 	    (*s != 'u' && *s != 'U')) {
  	    continue;
  	}
  	p--;
+         count = *s=='u' ? 4 : 8;
  	s++;
  
! 	/* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
  	outpos = p-PyUnicode_AS_UNICODE(v);
! 	for (x = 0, i = 0; i < count; ++i, ++s) {
  	    c = (unsigned char)*s;
  	    if (!isxdigit(c)) {
***************
*** 2077,2080 ****
--- 2079,2092 ----
  		x += 10 + c - 'A';
  	}
+ #ifndef Py_UNICODE_WIDE
+         if (x > 0x10000) {
+             if (unicode_decode_call_errorhandler(
+                     errors, &errorHandler,
+                     "rawunicodeescape", "\\Uxxxxxxxx out of range",
+ 		    starts, size, &startinpos, &endinpos, &exc, &s,
+ 		    (PyObject **)&v, &outpos, &p))
+ 		    goto onError;
+         }
+ #endif
  	*p++ = x;
  	nextByte:
***************
*** 2103,2107 ****
--- 2115,2123 ----
      static const char *hexdigit = "0123456789abcdef";
  
+ #ifdef Py_UNICODE_WIDE
+     repr = PyString_FromStringAndSize(NULL, 10 * size);
+ #else
      repr = PyString_FromStringAndSize(NULL, 6 * size);
+ #endif
      if (repr == NULL)
          return NULL;
***************
*** 2112,2115 ****
--- 2128,2147 ----
      while (size-- > 0) {
          Py_UNICODE ch = *s++;
+ #ifdef Py_UNICODE_WIDE
+ 	/* Map 32-bit characters to '\Uxxxxxxxx' */
+ 	if (ch >= 0x10000) {
+             *p++ = '\\';
+             *p++ = 'U';
+             *p++ = hexdigit[(ch >> 28) & 0xf];
+             *p++ = hexdigit[(ch >> 24) & 0xf];
+             *p++ = hexdigit[(ch >> 20) & 0xf];
+             *p++ = hexdigit[(ch >> 16) & 0xf];
+             *p++ = hexdigit[(ch >> 12) & 0xf];
+             *p++ = hexdigit[(ch >> 8) & 0xf];
+             *p++ = hexdigit[(ch >> 4) & 0xf];
+             *p++ = hexdigit[ch & 15];
+         } 
+         else
+ #endif
  	/* Map 16-bit characters to '\uxxxx' */
  	if (ch >= 256) {
***************
*** 6770,6771 ****
--- 6802,6810 ----
      unicode_freelist_size = 0;
  }
+ 
+ /*
+ Local variables:
+ c-basic-offset: 4
+ indent-tabs-mode: nil
+ End:
+ */