[Python-checkins] cpython: The locale decoder raises a UnicodeDecodeError instead of an OSError
victor.stinner
python-checkins at python.org
Sat Dec 17 07:07:48 CET 2011
http://hg.python.org/cpython/rev/ea421c534305
changeset: 74010:ea421c534305
user: Victor Stinner <victor.stinner at haypocalc.com>
date: Sat Dec 17 07:08:30 2011 +0100
summary:
The locale decoder raises a UnicodeDecodeError instead of an OSError
Search the invalid character using mbrtowc().
files:
Objects/unicodeobject.c | 105 +++++++++++++++++++++++----
1 files changed, 87 insertions(+), 18 deletions(-)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3084,9 +3084,7 @@
#endif
char outbuf[MB_LEN_MAX];
const wchar_t *start, *previous;
- int save_errno;
-
- save_errno = errno;
+
#if SIZEOF_WCHAR_T == 2
buf[2] = 0;
#else
@@ -3114,14 +3112,11 @@
wstr++;
#endif
len = wcstombs(outbuf, buf, sizeof(outbuf));
- if (len == (size_t)-1) {
- errno = save_errno;
+ if (len == (size_t)-1)
return previous - start;
- }
}
/* failed to find the unencodable character */
- errno = save_errno;
return 0;
}
@@ -3199,7 +3194,7 @@
len = wcstombs(NULL, wstr, 0);
if (len == (size_t)-1) {
- error_pos = wcstombs_errorpos(wstr);
+ error_pos = (size_t)-1;
goto encode_error;
}
@@ -3211,7 +3206,7 @@
len2 = wcstombs(PyBytes_AS_STRING(bytes), wstr, len+1);
if (len2 == (size_t)-1 || len2 > len) {
- error_pos = wcstombs_errorpos(wstr);
+ error_pos = (size_t)-1;
goto encode_error;
}
PyMem_Free(wstr);
@@ -3221,12 +3216,23 @@
encode_error:
errmsg = strerror(errno);
assert(errmsg != NULL);
+
+ if (error_pos == (size_t)-1)
+ error_pos = wcstombs_errorpos(wstr);
+
PyMem_Free(wstr);
Py_XDECREF(bytes);
- if (errmsg != NULL)
- reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape");
- else
+ if (errmsg != NULL) {
+ size_t errlen;
+ wstr = _Py_char2wchar(errmsg, &errlen);
+ if (wstr != NULL) {
+ reason = PyUnicode_FromWideChar(wstr, errlen);
+ PyMem_Free(wstr);
+ } else
+ errmsg = NULL;
+ }
+ if (errmsg == NULL)
reason = PyUnicode_FromString(
"wcstombs() encountered an unencodable "
"wide character");
@@ -3376,6 +3382,37 @@
return NULL;
}
+static size_t
+mbstowcs_errorpos(const char *str, size_t len)
+{
+#ifdef HAVE_MBRTOWC
+ const char *start = str;
+ mbstate_t mbs;
+ size_t converted;
+ wchar_t ch;
+
+ memset(&mbs, 0, sizeof mbs);
+ while (len)
+ {
+ converted = mbrtowc(&ch, (char*)str, len, &mbs);
+ if (converted == 0)
+ /* Reached end of string */
+ break;
+ if (converted == (size_t)-1 || converted == (size_t)-2) {
+ /* Conversion error or incomplete character */
+ return str - start;
+ }
+ else {
+ str += converted;
+ len -= converted;
+ }
+ }
+ /* failed to find the undecodable byte sequence */
+ return 0;
+#endif
+ return 0;
+}
+
PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
@@ -3386,6 +3423,9 @@
size_t wlen, wlen2;
PyObject *unicode;
int surrogateescape;
+ size_t error_pos;
+ char *errmsg;
+ PyObject *reason, *exc;
if (locale_error_handler(errors, &surrogateescape) < 0)
return NULL;
@@ -3415,10 +3455,8 @@
#else
wlen = len;
#endif
- if (wlen == (size_t)-1) {
- PyErr_SetFromErrno(PyExc_OSError);
- return NULL;
- }
+ if (wlen == (size_t)-1)
+ goto decode_error;
if (wlen+1 <= smallbuf_len) {
wstr = smallbuf;
}
@@ -3436,8 +3474,7 @@
if (wlen2 == (size_t)-1) {
if (wstr != smallbuf)
PyMem_Free(wstr);
- PyErr_SetFromErrno(PyExc_OSError);
- return NULL;
+ goto decode_error;
}
#ifdef HAVE_BROKEN_MBSTOWCS
assert(wlen2 == wlen);
@@ -3447,6 +3484,38 @@
PyMem_Free(wstr);
}
return unicode;
+
+decode_error:
+ errmsg = strerror(errno);
+ assert(errmsg != NULL);
+
+ error_pos = mbstowcs_errorpos(str, len);
+ if (errmsg != NULL) {
+ size_t errlen;
+ wstr = _Py_char2wchar(errmsg, &errlen);
+ if (wstr != NULL) {
+ reason = PyUnicode_FromWideChar(wstr, errlen);
+ PyMem_Free(wstr);
+ } else
+ errmsg = NULL;
+ }
+ if (errmsg == NULL)
+ reason = PyUnicode_FromString(
+ "mbstowcs() encountered an invalid multibyte sequence");
+ if (reason == NULL)
+ return NULL;
+
+ exc = PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nnO",
+ "locale", str, len,
+ (Py_ssize_t)error_pos,
+ (Py_ssize_t)(error_pos+1),
+ reason);
+ Py_DECREF(reason);
+ if (exc != NULL) {
+ PyCodec_StrictErrors(exc);
+ Py_XDECREF(exc);
+ }
+ return NULL;
}
PyObject*
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list