[Python-checkins] r72266 - in python/branches/release30-maint: Misc/NEWS Objects/unicodeobject.c
walter.doerwald
python-checkins at python.org
Mon May 4 00:57:39 CEST 2009
Author: walter.doerwald
Date: Mon May 4 00:57:39 2009
New Revision: 72266
Log:
Merged revisions 72265 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
................
r72265 | walter.doerwald | 2009-05-04 00:55:55 +0200 (Mo, 04 Mai 2009) | 12 lines
Merged revisions 72260 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r72260 | walter.doerwald | 2009-05-04 00:36:33 +0200 (Mo, 04 Mai 2009) | 5 lines
Issue #5108: Handle %s like %S and %R in PyUnicode_FromFormatV(): Call
PyUnicode_DecodeUTF8() once, remember the result and output it in a second
step. This avoids problems with counting UTF-8 bytes that ignores the effect
of using the replace error handler in PyUnicode_DecodeUTF8().
........
................
Modified:
python/branches/release30-maint/ (props changed)
python/branches/release30-maint/Misc/NEWS
python/branches/release30-maint/Objects/unicodeobject.c
Modified: python/branches/release30-maint/Misc/NEWS
==============================================================================
--- python/branches/release30-maint/Misc/NEWS (original)
+++ python/branches/release30-maint/Misc/NEWS Mon May 4 00:57:39 2009
@@ -39,6 +39,11 @@
- Issue #5249: time.strftime returned malformed string when format string
contained non ascii character on windows.
+- Issue #5108: Handle %s like %S, %R and %A in PyUnicode_FromFormatV(): Call
+ PyUnicode_DecodeUTF8() once, remember the result and output it in a second
+ step. This avoids problems with counting UTF-8 bytes that ignores the effect
+ of using the replace error handler in PyUnicode_DecodeUTF8().
+
Library
-------
Modified: python/branches/release30-maint/Objects/unicodeobject.c
==============================================================================
--- python/branches/release30-maint/Objects/unicodeobject.c (original)
+++ python/branches/release30-maint/Objects/unicodeobject.c Mon May 4 00:57:39 2009
@@ -654,16 +654,26 @@
count = vargs;
#endif
#endif
- /* step 1: count the number of %S/%R/%A format specifications
- * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII() for
- * these objects once during step 3 and put the result in
- an array) */
+ /* step 1: count the number of %S/%R/%A/%s format specifications
+ * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
+ * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
+ * result in an array) */
for (f = format; *f; f++) {
- if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A'))
- ++callcount;
+ if (*f == '%') {
+ if (*(f+1)=='%')
+ continue;
+ if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
+ ++callcount;
+ while (ISDIGIT((unsigned)*f))
+ width = (width*10) + *f++ - '0';
+ while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
+ ;
+ if (*f == 's')
+ ++callcount;
+ }
}
/* step 2: allocate memory for the results of
- * PyObject_Str()/PyObject_Repr() calls */
+ * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
if (callcount) {
callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
if (!callresults) {
@@ -712,35 +722,13 @@
case 's':
{
/* UTF-8 */
- unsigned char*s;
- s = va_arg(count, unsigned char*);
- while (*s) {
- if (*s < 128) {
- n++; s++;
- } else if (*s < 0xc0) {
- /* invalid UTF-8 */
- n++; s++;
- } else if (*s < 0xc0) {
- n++;
- s++; if(!*s)break;
- s++;
- } else if (*s < 0xe0) {
- n++;
- s++; if(!*s)break;
- s++; if(!*s)break;
- s++;
- } else {
-#ifdef Py_UNICODE_WIDE
- n++;
-#else
- n+=2;
-#endif
- s++; if(!*s)break;
- s++; if(!*s)break;
- s++; if(!*s)break;
- s++;
- }
- }
+ unsigned char *s = va_arg(count, unsigned char*);
+ PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
+ if (!str)
+ goto fail;
+ n += PyUnicode_GET_SIZE(str);
+ /* Remember the str and switch to the next slot */
+ *callresult++ = str;
break;
}
case 'U':
@@ -909,19 +897,15 @@
break;
case 's':
{
- /* Parameter must be UTF-8 encoded.
- In case of encoding errors, use
- the replacement character. */
- PyObject *u;
- p = va_arg(vargs, char*);
- u = PyUnicode_DecodeUTF8(p, strlen(p),
- "replace");
- if (!u)
- goto fail;
- Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
- PyUnicode_GET_SIZE(u));
- s += PyUnicode_GET_SIZE(u);
- Py_DECREF(u);
+ /* unused, since we already have the result */
+ (void) va_arg(vargs, char *);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
+ PyUnicode_GET_SIZE(*callresult));
+ s += PyUnicode_GET_SIZE(*callresult);
+ /* We're done with the unicode()/repr() => forget it */
+ Py_DECREF(*callresult);
+ /* switch to next unicode()/repr() result */
+ ++callresult;
break;
}
case 'U':
More information about the Python-checkins
mailing list