Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5498/Objects Modified Files: Tag: release24-maint unicodeobject.c Log Message: Backport checkin: SF bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain about illegal code points. The codec now supports PEP 293 style error handlers. (This is a variant of the patch by Nik Haldimann that detects truncated data) Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.230 retrieving revision 2.230.2.1 diff -u -d -r2.230 -r2.230.2.1 --- unicodeobject.c 22 Nov 2004 13:02:30 -0000 2.230 +++ unicodeobject.c 30 Aug 2005 10:46:06 -0000 2.230.2.1 @@ -2273,6 +2273,81 @@ PyUnicode_GET_SIZE(unicode)); } +/* --- Unicode Internal Codec ------------------------------------------- */ + +PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, + int size, + const char *errors) +{ + const char *starts = s; + int startinpos; + int endinpos; + int outpos; + Py_UNICODE unimax; + PyUnicodeObject *v; + Py_UNICODE *p; + const char *end; + const char *reason; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + unimax = PyUnicode_GetMax(); + v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); + if (v == NULL) + goto onError; + if (PyUnicode_GetSize((PyObject *)v) == 0) + return (PyObject *)v; + p = PyUnicode_AS_UNICODE(v); + end = s + size; + + while (s < end) { + *p = *(Py_UNICODE *)s; + /* We have to sanity check the raw data, otherwise doom looms for + some malformed UCS-4 data. */ + if ( + #ifdef Py_UNICODE_WIDE + *p > unimax || *p < 0 || + #endif + end-s < Py_UNICODE_SIZE + ) + { + startinpos = s - starts; + if (end-s < Py_UNICODE_SIZE) { + endinpos = end-starts; + reason = "truncated input"; + } + else { + endinpos = s - starts + Py_UNICODE_SIZE; + reason = "illegal code point (> 0x10FFFF)"; + } + outpos = p - PyUnicode_AS_UNICODE(v); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicode_internal", reason, + starts, size, &startinpos, &endinpos, &exc, &s, + (PyObject **)&v, &outpos, &p)) { + goto onError; + } + } + else { + p++; + s += Py_UNICODE_SIZE; + } + } + + if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return (PyObject *)v; + + onError: + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + /* --- Latin-1 Codec ------------------------------------------------------ */ PyObject *PyUnicode_DecodeLatin1(const char *s,