[Python-checkins] python/dist/src/Objects unicodeobject.c, 2.231, 2.232
Neal Norwitz
nnorwitz at gmail.com
Wed Oct 12 05:50:53 CEST 2005
I don't have a problem with this checkin, but in reviewing it I
noticed something. It seems that the exceptionObject (exc) is
returned from unicode_decode_call_errorhandler(), but it is never used
other than calling Py_XDECREF(exc). It looked like goto onError
always followed a failure of unicode_decode_call_errorhandler().
Is this the case? Can exceptionObject be removed? I didn't look at
any other parameter to determine if each was necessary. I didn't
study it long enough to be sure exc can be removed, but it sure looked
like it from a moderate inspection.
If we could simplify this code it would be nice.
n
--
On 10/6/05, doerwalter at users.sourceforge.net
<doerwalter at users.sourceforge.net> wrote:
> Update of /cvsroot/python/python/dist/src/Objects
> In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Objects
>
> Modified Files:
> unicodeobject.c
> Log Message:
> Part of SF patch #1313939: Speedup charmap decoding by extending
> PyUnicode_DecodeCharmap() the accept a unicode string as the mapping
> argument which is used as a mapping table.
>
> This code isn't used by any of the codecs yet.
>
>
> Index: unicodeobject.c
> ===================================================================
> RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
> retrieving revision 2.231
> retrieving revision 2.232
> diff -u -d -r2.231 -r2.232
> --- unicodeobject.c 30 Aug 2005 10:23:14 -0000 2.231
> +++ unicodeobject.c 6 Oct 2005 20:29:57 -0000 2.232
> @@ -2833,6 +2833,8 @@
> int extrachars = 0;
> PyObject *errorHandler = NULL;
> PyObject *exc = NULL;
> + Py_UNICODE *mapstring = NULL;
> + int maplen = 0;
>
> /* Default to Latin-1 */
> if (mapping == NULL)
> @@ -2845,91 +2847,121 @@
> return (PyObject *)v;
> p = PyUnicode_AS_UNICODE(v);
> e = s + size;
> - while (s < e) {
> - unsigned char ch = *s;
> - PyObject *w, *x;
> + if (PyUnicode_CheckExact(mapping)) {
> + mapstring = PyUnicode_AS_UNICODE(mapping);
> + maplen = PyUnicode_GET_SIZE(mapping);
> + while (s < e) {
> + unsigned char ch = *s;
> + Py_UNICODE x = 0xfffe; /* illegal value */
>
> - /* Get mapping (char ordinal -> integer, Unicode char or None) */
> - w = PyInt_FromLong((long)ch);
> - if (w == NULL)
> - goto onError;
> - x = PyObject_GetItem(mapping, w);
> - Py_DECREF(w);
> - if (x == NULL) {
> - if (PyErr_ExceptionMatches(PyExc_LookupError)) {
> - /* No mapping found means: mapping is undefined. */
> - PyErr_Clear();
> - x = Py_None;
> - Py_INCREF(x);
> - } else
> - goto onError;
> - }
> + if (ch < maplen)
> + x = mapstring[ch];
>
> - /* Apply mapping */
> - if (PyInt_Check(x)) {
> - long value = PyInt_AS_LONG(x);
> - if (value < 0 || value > 65535) {
> - PyErr_SetString(PyExc_TypeError,
> - "character mapping must be in range(65536)");
> - Py_DECREF(x);
> - goto onError;
> + if (x == 0xfffe) {
> + /* undefined mapping */
> + outpos = p-PyUnicode_AS_UNICODE(v);
> + startinpos = s-starts;
> + endinpos = startinpos+1;
> + if (unicode_decode_call_errorhandler(
> + errors, &errorHandler,
> + "charmap", "character maps to <undefined>",
> + starts, size, &startinpos, &endinpos, &exc, &s,
> + (PyObject **)&v, &outpos, &p)) {
> + goto onError;
> + }
> + continue;
> }
> - *p++ = (Py_UNICODE)value;
> + *p++ = x;
> + ++s;
> }
> - else if (x == Py_None) {
> - /* undefined mapping */
> - outpos = p-PyUnicode_AS_UNICODE(v);
> - startinpos = s-starts;
> - endinpos = startinpos+1;
> - if (unicode_decode_call_errorhandler(
> - errors, &errorHandler,
> - "charmap", "character maps to <undefined>",
> - starts, size, &startinpos, &endinpos, &exc, &s,
> - (PyObject **)&v, &outpos, &p)) {
> - Py_DECREF(x);
> + }
> + else {
> + while (s < e) {
> + unsigned char ch = *s;
> + PyObject *w, *x;
> +
> + /* Get mapping (char ordinal -> integer, Unicode char or None) */
> + w = PyInt_FromLong((long)ch);
> + if (w == NULL)
> goto onError;
> + x = PyObject_GetItem(mapping, w);
> + Py_DECREF(w);
> + if (x == NULL) {
> + if (PyErr_ExceptionMatches(PyExc_LookupError)) {
> + /* No mapping found means: mapping is undefined. */
> + PyErr_Clear();
> + x = Py_None;
> + Py_INCREF(x);
> + } else
> + goto onError;
> }
> - continue;
> - }
> - else if (PyUnicode_Check(x)) {
> - int targetsize = PyUnicode_GET_SIZE(x);
> -
> - if (targetsize == 1)
> - /* 1-1 mapping */
> - *p++ = *PyUnicode_AS_UNICODE(x);
> -
> - else if (targetsize > 1) {
> - /* 1-n mapping */
> - if (targetsize > extrachars) {
> - /* resize first */
> - int oldpos = (int)(p - PyUnicode_AS_UNICODE(v));
> - int needed = (targetsize - extrachars) + \
> - (targetsize << 2);
> - extrachars += needed;
> - if (_PyUnicode_Resize(&v,
> - PyUnicode_GET_SIZE(v) + needed) < 0) {
> - Py_DECREF(x);
> - goto onError;
> +
> + /* Apply mapping */
> + if (PyInt_Check(x)) {
> + long value = PyInt_AS_LONG(x);
> + if (value < 0 || value > 65535) {
> + PyErr_SetString(PyExc_TypeError,
> + "character mapping must be in range(65536)");
> + Py_DECREF(x);
> + goto onError;
> + }
> + *p++ = (Py_UNICODE)value;
> + }
> + else if (x == Py_None) {
> + /* undefined mapping */
> + outpos = p-PyUnicode_AS_UNICODE(v);
> + startinpos = s-starts;
> + endinpos = startinpos+1;
> + if (unicode_decode_call_errorhandler(
> + errors, &errorHandler,
> + "charmap", "character maps to <undefined>",
> + starts, size, &startinpos, &endinpos, &exc, &s,
> + (PyObject **)&v, &outpos, &p)) {
> + Py_DECREF(x);
> + goto onError;
> + }
> + continue;
> + }
> + else if (PyUnicode_Check(x)) {
> + int targetsize = PyUnicode_GET_SIZE(x);
> +
> + if (targetsize == 1)
> + /* 1-1 mapping */
> + *p++ = *PyUnicode_AS_UNICODE(x);
> +
> + else if (targetsize > 1) {
> + /* 1-n mapping */
> + if (targetsize > extrachars) {
> + /* resize first */
> + int oldpos = (int)(p - PyUnicode_AS_UNICODE(v));
> + int needed = (targetsize - extrachars) + \
> + (targetsize << 2);
> + extrachars += needed;
> + if (_PyUnicode_Resize(&v,
> + PyUnicode_GET_SIZE(v) + needed) < 0) {
> + Py_DECREF(x);
> + goto onError;
> + }
> + p = PyUnicode_AS_UNICODE(v) + oldpos;
> }
> - p = PyUnicode_AS_UNICODE(v) + oldpos;
> + Py_UNICODE_COPY(p,
> + PyUnicode_AS_UNICODE(x),
> + targetsize);
> + p += targetsize;
> + extrachars -= targetsize;
> }
> - Py_UNICODE_COPY(p,
> - PyUnicode_AS_UNICODE(x),
> - targetsize);
> - p += targetsize;
> - extrachars -= targetsize;
> + /* 1-0 mapping: skip the character */
> + }
> + else {
> + /* wrong return value */
> + PyErr_SetString(PyExc_TypeError,
> + "character mapping must return integer, None or unicode");
> + Py_DECREF(x);
> + goto onError;
> }
> - /* 1-0 mapping: skip the character */
> - }
> - else {
> - /* wrong return value */
> - PyErr_SetString(PyExc_TypeError,
> - "character mapping must return integer, None or unicode");
> Py_DECREF(x);
> - goto onError;
> + ++s;
> }
> - Py_DECREF(x);
> - ++s;
> }
> if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
> if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
>
> _______________________________________________
> Python-checkins mailing list
> Python-checkins at python.org
> http://mail.python.org/mailman/listinfo/python-checkins
>
More information about the Python-checkins
mailing list