[Python-checkins] python/dist/src/Objects unicodeobject.c,2.187,2.188
loewis@users.sourceforge.net
loewis@users.sourceforge.net
Sun, 18 May 2003 05:31:11 -0700
Update of /cvsroot/python/python/dist/src/Objects
In directory sc8-pr-cvs1:/tmp/cvs-serv31612/Objects
Modified Files:
unicodeobject.c
Log Message:
Consider \U-escapes in raw-unicode-escape. Fixes #444514.
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.187
retrieving revision 2.188
diff -C2 -d -r2.187 -r2.188
*** unicodeobject.c 10 Apr 2003 22:35:32 -0000 2.187
--- unicodeobject.c 18 May 2003 12:31:09 -0000 2.188
***************
*** 2031,2034 ****
--- 2031,2035 ----
Py_UCS4 x;
int i;
+ int count;
/* Non-escape characters are interpreted as Unicode ordinals */
***************
*** 2049,2061 ****
if (((s - bs) & 1) == 0 ||
s >= end ||
! *s != 'u') {
continue;
}
p--;
s++;
! /* \uXXXX with 4 hex digits */
outpos = p-PyUnicode_AS_UNICODE(v);
! for (x = 0, i = 0; i < 4; ++i, ++s) {
c = (unsigned char)*s;
if (!isxdigit(c)) {
--- 2050,2063 ----
if (((s - bs) & 1) == 0 ||
s >= end ||
! (*s != 'u' && *s != 'U')) {
continue;
}
p--;
+ count = *s=='u' ? 4 : 8;
s++;
! /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
outpos = p-PyUnicode_AS_UNICODE(v);
! for (x = 0, i = 0; i < count; ++i, ++s) {
c = (unsigned char)*s;
if (!isxdigit(c)) {
***************
*** 2077,2080 ****
--- 2079,2092 ----
x += 10 + c - 'A';
}
+ #ifndef Py_UNICODE_WIDE
+ if (x > 0x10000) {
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "rawunicodeescape", "\\Uxxxxxxxx out of range",
+ starts, size, &startinpos, &endinpos, &exc, &s,
+ (PyObject **)&v, &outpos, &p))
+ goto onError;
+ }
+ #endif
*p++ = x;
nextByte:
***************
*** 2103,2107 ****
--- 2115,2123 ----
static const char *hexdigit = "0123456789abcdef";
+ #ifdef Py_UNICODE_WIDE
+ repr = PyString_FromStringAndSize(NULL, 10 * size);
+ #else
repr = PyString_FromStringAndSize(NULL, 6 * size);
+ #endif
if (repr == NULL)
return NULL;
***************
*** 2112,2115 ****
--- 2128,2147 ----
while (size-- > 0) {
Py_UNICODE ch = *s++;
+ #ifdef Py_UNICODE_WIDE
+ /* Map 32-bit characters to '\Uxxxxxxxx' */
+ if (ch >= 0x10000) {
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigit[(ch >> 28) & 0xf];
+ *p++ = hexdigit[(ch >> 24) & 0xf];
+ *p++ = hexdigit[(ch >> 20) & 0xf];
+ *p++ = hexdigit[(ch >> 16) & 0xf];
+ *p++ = hexdigit[(ch >> 12) & 0xf];
+ *p++ = hexdigit[(ch >> 8) & 0xf];
+ *p++ = hexdigit[(ch >> 4) & 0xf];
+ *p++ = hexdigit[ch & 15];
+ }
+ else
+ #endif
/* Map 16-bit characters to '\uxxxx' */
if (ch >= 256) {
***************
*** 6770,6771 ****
--- 6802,6810 ----
unicode_freelist_size = 0;
}
+
+ /*
+ Local variables:
+ c-basic-offset: 4
+ indent-tabs-mode: nil
+ End:
+ */