[Python-checkins] python/dist/src/Objects unicodeobject.c, 2.231, 2.232

Neal Norwitz nnorwitz at gmail.com
Wed Oct 12 05:50:53 CEST 2005


I don't have a problem with this checkin, but in reviewing it I
noticed something.  It seems that the exceptionObject (exc) is
returned from unicode_decode_call_errorhandler(), but it is never used
other than calling Py_XDECREF(exc).  It looked like goto onError
always followed a failure of unicode_decode_call_errorhandler().

Is this the case?  Can exceptionObject be removed?  I didn't look at
any other parameter to determine if each was necessary.  I didn't
study it long enough to be sure exc can be removed, but it sure looked
like it from a moderate inspection.

If we could simplify this code it would be nice.

n
--

On 10/6/05, doerwalter at users.sourceforge.net
<doerwalter at users.sourceforge.net> wrote:
> Update of /cvsroot/python/python/dist/src/Objects
> In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Objects
>
> Modified Files:
>         unicodeobject.c
> Log Message:
> Part of SF patch #1313939: Speedup charmap decoding by extending
> PyUnicode_DecodeCharmap() the accept a unicode string as the mapping
> argument which is used as a mapping table.
>
> This code isn't used by any of the codecs yet.
>
>
> Index: unicodeobject.c
> ===================================================================
> RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
> retrieving revision 2.231
> retrieving revision 2.232
> diff -u -d -r2.231 -r2.232
> --- unicodeobject.c     30 Aug 2005 10:23:14 -0000      2.231
> +++ unicodeobject.c     6 Oct 2005 20:29:57 -0000       2.232
> @@ -2833,6 +2833,8 @@
>      int extrachars = 0;
>      PyObject *errorHandler = NULL;
>      PyObject *exc = NULL;
> +    Py_UNICODE *mapstring = NULL;
> +    int maplen = 0;
>
>      /* Default to Latin-1 */
>      if (mapping == NULL)
> @@ -2845,91 +2847,121 @@
>         return (PyObject *)v;
>      p = PyUnicode_AS_UNICODE(v);
>      e = s + size;
> -    while (s < e) {
> -       unsigned char ch = *s;
> -       PyObject *w, *x;
> +    if (PyUnicode_CheckExact(mapping)) {
> +       mapstring = PyUnicode_AS_UNICODE(mapping);
> +       maplen = PyUnicode_GET_SIZE(mapping);
> +       while (s < e) {
> +           unsigned char ch = *s;
> +           Py_UNICODE x = 0xfffe; /* illegal value */
>
> -       /* Get mapping (char ordinal -> integer, Unicode char or None) */
> -       w = PyInt_FromLong((long)ch);
> -       if (w == NULL)
> -           goto onError;
> -       x = PyObject_GetItem(mapping, w);
> -       Py_DECREF(w);
> -       if (x == NULL) {
> -           if (PyErr_ExceptionMatches(PyExc_LookupError)) {
> -               /* No mapping found means: mapping is undefined. */
> -               PyErr_Clear();
> -               x = Py_None;
> -               Py_INCREF(x);
> -           } else
> -               goto onError;
> -       }
> +           if (ch < maplen)
> +               x = mapstring[ch];
>
> -       /* Apply mapping */
> -       if (PyInt_Check(x)) {
> -           long value = PyInt_AS_LONG(x);
> -           if (value < 0 || value > 65535) {
> -               PyErr_SetString(PyExc_TypeError,
> -                               "character mapping must be in range(65536)");
> -               Py_DECREF(x);
> -               goto onError;
> +           if (x == 0xfffe) {
> +               /* undefined mapping */
> +               outpos = p-PyUnicode_AS_UNICODE(v);
> +               startinpos = s-starts;
> +               endinpos = startinpos+1;
> +               if (unicode_decode_call_errorhandler(
> +                    errors, &errorHandler,
> +                    "charmap", "character maps to <undefined>",
> +                    starts, size, &startinpos, &endinpos, &exc, &s,
> +                    (PyObject **)&v, &outpos, &p)) {
> +                   goto onError;
> +               }
> +               continue;
>             }
> -           *p++ = (Py_UNICODE)value;
> +           *p++ = x;
> +           ++s;
>         }
> -       else if (x == Py_None) {
> -           /* undefined mapping */
> -           outpos = p-PyUnicode_AS_UNICODE(v);
> -           startinpos = s-starts;
> -           endinpos = startinpos+1;
> -           if (unicode_decode_call_errorhandler(
> -                errors, &errorHandler,
> -                "charmap", "character maps to <undefined>",
> -                starts, size, &startinpos, &endinpos, &exc, &s,
> -                (PyObject **)&v, &outpos, &p)) {
> -               Py_DECREF(x);
> +    }
> +    else {
> +       while (s < e) {
> +           unsigned char ch = *s;
> +           PyObject *w, *x;
> +
> +           /* Get mapping (char ordinal -> integer, Unicode char or None) */
> +           w = PyInt_FromLong((long)ch);
> +           if (w == NULL)
>                 goto onError;
> +           x = PyObject_GetItem(mapping, w);
> +           Py_DECREF(w);
> +           if (x == NULL) {
> +               if (PyErr_ExceptionMatches(PyExc_LookupError)) {
> +                   /* No mapping found means: mapping is undefined. */
> +                   PyErr_Clear();
> +                   x = Py_None;
> +                   Py_INCREF(x);
> +               } else
> +                   goto onError;
>             }
> -           continue;
> -       }
> -       else if (PyUnicode_Check(x)) {
> -           int targetsize = PyUnicode_GET_SIZE(x);
> -
> -           if (targetsize == 1)
> -               /* 1-1 mapping */
> -               *p++ = *PyUnicode_AS_UNICODE(x);
> -
> -           else if (targetsize > 1) {
> -               /* 1-n mapping */
> -               if (targetsize > extrachars) {
> -                   /* resize first */
> -                   int oldpos = (int)(p - PyUnicode_AS_UNICODE(v));
> -                   int needed = (targetsize - extrachars) + \
> -                                (targetsize << 2);
> -                   extrachars += needed;
> -                   if (_PyUnicode_Resize(&v,
> -                                        PyUnicode_GET_SIZE(v) + needed) < 0) {
> -                       Py_DECREF(x);
> -                       goto onError;
> +
> +           /* Apply mapping */
> +           if (PyInt_Check(x)) {
> +               long value = PyInt_AS_LONG(x);
> +               if (value < 0 || value > 65535) {
> +                   PyErr_SetString(PyExc_TypeError,
> +                                   "character mapping must be in range(65536)");
> +                   Py_DECREF(x);
> +                   goto onError;
> +               }
> +               *p++ = (Py_UNICODE)value;
> +           }
> +           else if (x == Py_None) {
> +               /* undefined mapping */
> +               outpos = p-PyUnicode_AS_UNICODE(v);
> +               startinpos = s-starts;
> +               endinpos = startinpos+1;
> +               if (unicode_decode_call_errorhandler(
> +                    errors, &errorHandler,
> +                    "charmap", "character maps to <undefined>",
> +                    starts, size, &startinpos, &endinpos, &exc, &s,
> +                    (PyObject **)&v, &outpos, &p)) {
> +                   Py_DECREF(x);
> +                   goto onError;
> +               }
> +               continue;
> +           }
> +           else if (PyUnicode_Check(x)) {
> +               int targetsize = PyUnicode_GET_SIZE(x);
> +
> +               if (targetsize == 1)
> +                   /* 1-1 mapping */
> +                   *p++ = *PyUnicode_AS_UNICODE(x);
> +
> +               else if (targetsize > 1) {
> +                   /* 1-n mapping */
> +                   if (targetsize > extrachars) {
> +                       /* resize first */
> +                       int oldpos = (int)(p - PyUnicode_AS_UNICODE(v));
> +                       int needed = (targetsize - extrachars) + \
> +                                    (targetsize << 2);
> +                       extrachars += needed;
> +                       if (_PyUnicode_Resize(&v,
> +                                            PyUnicode_GET_SIZE(v) + needed) < 0) {
> +                           Py_DECREF(x);
> +                           goto onError;
> +                       }
> +                       p = PyUnicode_AS_UNICODE(v) + oldpos;
>                     }
> -                   p = PyUnicode_AS_UNICODE(v) + oldpos;
> +                   Py_UNICODE_COPY(p,
> +                                   PyUnicode_AS_UNICODE(x),
> +                                   targetsize);
> +                   p += targetsize;
> +                   extrachars -= targetsize;
>                 }
> -               Py_UNICODE_COPY(p,
> -                               PyUnicode_AS_UNICODE(x),
> -                               targetsize);
> -               p += targetsize;
> -               extrachars -= targetsize;
> +               /* 1-0 mapping: skip the character */
> +           }
> +           else {
> +               /* wrong return value */
> +               PyErr_SetString(PyExc_TypeError,
> +                     "character mapping must return integer, None or unicode");
> +               Py_DECREF(x);
> +               goto onError;
>             }
> -           /* 1-0 mapping: skip the character */
> -       }
> -       else {
> -           /* wrong return value */
> -           PyErr_SetString(PyExc_TypeError,
> -                 "character mapping must return integer, None or unicode");
>             Py_DECREF(x);
> -           goto onError;
> +           ++s;
>         }
> -       Py_DECREF(x);
> -       ++s;
>      }
>      if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
>         if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
>
> _______________________________________________
> Python-checkins mailing list
> Python-checkins at python.org
> http://mail.python.org/mailman/listinfo/python-checkins
>


More information about the Python-checkins mailing list