[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.50,2.51
M.-A. Lemburg
python-dev@python.org
Mon, 17 Jul 2000 11:23:20 -0700
Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv8024/Objects
Modified Files:
unicodeobject.c
Log Message:
Fixed problems with UTF error reporting macros and some formatting bugs.
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.50
retrieving revision 2.51
diff -C2 -r2.50 -r2.51
*** unicodeobject.c 2000/07/17 09:04:43 2.50
--- unicodeobject.c 2000/07/17 18:23:13 2.51
***************
*** 634,644 ****
}
- #define UTF8_ERROR(details) \
- do { \
- if (utf8_decoding_error(&s, &p, errors, (details))) \
- goto onError; \
- goto nextchar; \
- } while (0)
-
PyObject *PyUnicode_DecodeUTF8(const char *s,
int size,
--- 634,637 ----
***************
*** 649,652 ****
--- 642,646 ----
PyUnicodeObject *unicode;
Py_UNICODE *p;
+ const char *errmsg = "";
/* Note: size will always be longer than the resulting Unicode
***************
*** 673,706 ****
n = utf8_code_length[ch];
! if (s + n > e)
! UTF8_ERROR("unexpected end of data");
switch (n) {
case 0:
! UTF8_ERROR("unexpected code byte");
break;
case 1:
! UTF8_ERROR("internal error");
break;
case 2:
! if ((s[1] & 0xc0) != 0x80)
! UTF8_ERROR("invalid data");
ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
! if (ch < 0x80)
! UTF8_ERROR("illegal encoding");
else
! *p++ = (Py_UNICODE)ch;
break;
case 3:
if ((s[1] & 0xc0) != 0x80 ||
! (s[2] & 0xc0) != 0x80)
! UTF8_ERROR("invalid data");
ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
! if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000))
! UTF8_ERROR("illegal encoding");
else
*p++ = (Py_UNICODE)ch;
--- 667,712 ----
n = utf8_code_length[ch];
! if (s + n > e) {
! errmsg = "unexpected end of data";
! goto utf8Error;
! }
switch (n) {
case 0:
! errmsg = "unexpected code byte";
! goto utf8Error;
break;
case 1:
! errmsg = "internal error";
! goto utf8Error;
break;
case 2:
! if ((s[1] & 0xc0) != 0x80) {
! errmsg = "invalid data";
! goto utf8Error;
! }
ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
! if (ch < 0x80) {
! errmsg = "illegal encoding";
! goto utf8Error;
! }
else
! *p++ = (Py_UNICODE)ch;
break;
case 3:
if ((s[1] & 0xc0) != 0x80 ||
! (s[2] & 0xc0) != 0x80) {
! errmsg = "invalid data";
! goto utf8Error;
! }
ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
! if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000)) {
! errmsg = "illegal encoding";
! goto utf8Error;
! }
else
*p++ = (Py_UNICODE)ch;
***************
*** 710,721 ****
if ((s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80 ||
! (s[3] & 0xc0) != 0x80)
! UTF8_ERROR("invalid data");
ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
/* validate and convert to UTF-16 */
! if ((ch < 0x10000) || /* minimum value allowed for 4 byte encoding */
! (ch > 0x10ffff)) /* maximum value allowed for UTF-16 */
! UTF8_ERROR("illegal encoding");
/* compute and append the two surrogates: */
--- 716,733 ----
if ((s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80 ||
! (s[3] & 0xc0) != 0x80) {
! errmsg = "invalid data";
! goto utf8Error;
! }
ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
/* validate and convert to UTF-16 */
! if ((ch < 0x10000) || /* minimum value allowed for 4
! byte encoding */
! (ch > 0x10ffff)) { /* maximum value allowed for
! UTF-16 */
! errmsg = "illegal encoding";
! goto utf8Error;
! }
/* compute and append the two surrogates: */
***************
*** 732,741 ****
default:
/* Other sizes are only needed for UCS-4 */
! UTF8_ERROR("unsupported Unicode code range");
}
s += n;
!
! nextchar:
! ;
}
--- 744,757 ----
default:
/* Other sizes are only needed for UCS-4 */
! errmsg = "unsupported Unicode code range";
! goto utf8Error;
! break;
}
s += n;
! continue;
!
! utf8Error:
! if (utf8_decoding_error(&s, &p, errors, errmsg))
! goto onError;
}
***************
*** 750,757 ****
return NULL;
}
-
- #undef UTF8_ERROR
! /* NOT USED */
#if 0
static
--- 766,772 ----
return NULL;
}
! /* Not used anymore, now that the encoder supports UTF-16
! surrogates. */
#if 0
static
***************
*** 784,788 ****
}
}
! #endif /* NOT USED */
PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
--- 799,803 ----
}
}
! #endif
PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
***************
*** 828,832 ****
cbAllocated += 4*10;
if (_PyString_Resize(&v, cbAllocated))
! goto onError;
}
--- 843,847 ----
cbAllocated += 4*10;
if (_PyString_Resize(&v, cbAllocated))
! goto onError;
}
***************
*** 939,948 ****
}
- #define UTF16_ERROR(details) do { \
- if (utf16_decoding_error(&q, &p, errors, details)) \
- goto onError; \
- continue; \
- } while(0)
-
PyObject *PyUnicode_DecodeUTF16(const char *s,
int size,
--- 954,957 ----
***************
*** 954,957 ****
--- 963,967 ----
const Py_UNICODE *q, *e;
int bo = 0;
+ const char *errmsg = "";
/* size should be an even number */
***************
*** 1013,1030 ****
/* UTF-16 code pair: */
! if (q >= e)
! UTF16_ERROR("unexpected end of data");
if (0xDC00 <= *q && *q <= 0xDFFF) {
q++;
! if (0xD800 <= *q && *q <= 0xDBFF)
/* This is valid data (a UTF-16 surrogate pair), but
we are not able to store this information since our
Py_UNICODE type only has 16 bits... this might
change someday, even though it's unlikely. */
! UTF16_ERROR("code pairs are not supported");
else
continue;
}
! UTF16_ERROR("illegal encoding");
}
--- 1023,1049 ----
/* UTF-16 code pair: */
! if (q >= e) {
! errmsg = "unexpected end of data";
! goto utf16Error;
! }
if (0xDC00 <= *q && *q <= 0xDFFF) {
q++;
! if (0xD800 <= *q && *q <= 0xDBFF) {
/* This is valid data (a UTF-16 surrogate pair), but
we are not able to store this information since our
Py_UNICODE type only has 16 bits... this might
change someday, even though it's unlikely. */
! errmsg = "code pairs are not supported";
! goto utf16Error;
! }
else
continue;
}
! errmsg = "illegal encoding";
! /* Fall through to report the error */
!
! utf16Error:
! if (utf16_decoding_error(&q, &p, errors, errmsg))
! goto onError;
}