[issue23055] PyUnicode_FromFormatV crasher
Serhiy Storchaka
report at bugs.python.org
Tue Dec 16 13:31:37 CET 2014
Serhiy Storchaka added the comment:
Here is updated patch for 2.7 (backported tests from 3.5), patches for 3.2 and
3.3.
----------
Added file: http://bugs.python.org/file37466/issue23055-2.7-2.patch
Added file: http://bugs.python.org/file37467/issue23055-3.2.patch
Added file: http://bugs.python.org/file37468/issue23055-3.3.patch
_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue23055>
_______________________________________
-------------- next part --------------
diff -r 9927781e457f Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py Mon Dec 15 14:02:43 2014 +0200
+++ b/Lib/test/test_unicode.py Tue Dec 16 13:52:24 2014 +0200
@@ -1700,6 +1700,9 @@ class UnicodeTest(
if sys.maxunicode > 0xffff:
check_format(u'\U0010ffff',
b'%c', c_int(0x10ffff))
+ else:
+ with self.assertRaises(OverflowError):
+ PyUnicode_FromFormat(b'%c', c_int(0x10000))
with self.assertRaises(OverflowError):
PyUnicode_FromFormat(b'%c', c_int(0x110000))
# Issue #18183
@@ -1750,8 +1753,45 @@ class UnicodeTest(
b'%zu', c_size_t(123))
# test long output
+ min_long = -(2 ** (8 * sizeof(c_long) - 1))
+ max_long = -min_long - 1
+ check_format(unicode(min_long),
+ b'%ld', c_long(min_long))
+ check_format(unicode(max_long),
+ b'%ld', c_long(max_long))
+ max_ulong = 2 ** (8 * sizeof(c_ulong)) - 1
+ check_format(unicode(max_ulong),
+ b'%lu', c_ulong(max_ulong))
PyUnicode_FromFormat(b'%p', c_void_p(-1))
+ # test padding (width and/or precision)
+ check_format(u'123'.rjust(10, u'0'),
+ b'%010i', c_int(123))
+ check_format(u'123'.rjust(100),
+ b'%100i', c_int(123))
+ check_format(u'123'.rjust(100, u'0'),
+ b'%.100i', c_int(123))
+ check_format(u'123'.rjust(80, u'0').rjust(100),
+ b'%100.80i', c_int(123))
+
+ check_format(u'123'.rjust(10, u'0'),
+ b'%010u', c_uint(123))
+ check_format(u'123'.rjust(100),
+ b'%100u', c_uint(123))
+ check_format(u'123'.rjust(100, u'0'),
+ b'%.100u', c_uint(123))
+ check_format(u'123'.rjust(80, u'0').rjust(100),
+ b'%100.80u', c_uint(123))
+
+ check_format(u'123'.rjust(10, u'0'),
+ b'%010x', c_int(0x123))
+ check_format(u'123'.rjust(100),
+ b'%100x', c_int(0x123))
+ check_format(u'123'.rjust(100, u'0'),
+ b'%.100x', c_int(0x123))
+ check_format(u'123'.rjust(80, u'0').rjust(100),
+ b'%100.80x', c_int(0x123))
+
# test %V
check_format(u'repr=abc',
b'repr=%V', u'abc', b'xyz')
diff -r 9927781e457f Misc/NEWS
--- a/Misc/NEWS Mon Dec 15 14:02:43 2014 +0200
+++ b/Misc/NEWS Tue Dec 16 13:52:24 2014 +0200
@@ -10,6 +10,9 @@ What's New in Python 2.7.10?
Core and Builtins
-----------------
+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
+ and fix by Guido Vranken.
+
- Issue #23048: Fix jumping out of an infinite while loop in the pdb.
Library
diff -r 9927781e457f Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Mon Dec 15 14:02:43 2014 +0200
+++ b/Objects/unicodeobject.c Tue Dec 16 13:52:24 2014 +0200
@@ -735,15 +735,10 @@ PyUnicode_FromFormatV(const char *format
* objects once during step 3 and put the result in an array) */
for (f = format; *f; f++) {
if (*f == '%') {
- if (*(f+1)=='%')
- continue;
- if (*(f+1)=='S' || *(f+1)=='R')
- ++callcount;
- while (isdigit((unsigned)*f))
- width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !isalpha((unsigned)*f))
- ;
- if (*f == 's')
+ f++;
+ while (*f && *f != '%' && !isalpha((unsigned)*f))
+ f++;
+ if (*f == 's' || *f=='S' || *f=='R')
++callcount;
}
}
@@ -760,12 +755,16 @@ PyUnicode_FromFormatV(const char *format
/* step 3: figure out how large a buffer we need */
for (f = format; *f; f++) {
if (*f == '%') {
- const char* p = f;
+ const char* p = f++;
width = 0;
while (isdigit((unsigned)*f))
width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !isalpha((unsigned)*f))
- ;
+ precision = 0;
+ if (*f == '.') {
+ f++;
+ while (isdigit((unsigned)*f))
+ precision = (precision*10) + *f++ - '0';
+ }
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
* they don't affect the amount of space we reserve.
@@ -800,6 +799,8 @@ PyUnicode_FromFormatV(const char *format
break;
case 'd': case 'u': case 'i': case 'x':
(void) va_arg(count, int);
+ if (width < precision)
+ width = precision;
/* 20 bytes is enough to hold a 64-bit
integer. Decimal takes the most space.
This isn't enough for octal.
-------------- next part --------------
diff -r a0368f81af9a Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py Fri Dec 05 20:15:15 2014 -0500
+++ b/Lib/test/test_unicode.py Tue Dec 16 13:51:07 2014 +0200
@@ -1661,7 +1661,10 @@ class UnicodeTest(string_tests.CommonTes
# Test PyUnicode_FromFormat()
def test_from_format(self):
support.import_module('ctypes')
- from ctypes import pythonapi, py_object, c_int
+ from ctypes import (
+ pythonapi, py_object, sizeof,
+ c_int, c_long, c_longlong, c_ssize_t,
+ c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
if sys.maxunicode == 65535:
name = "PyUnicodeUCS2_FromFormat"
else:
@@ -1675,9 +1678,13 @@ class UnicodeTest(string_tests.CommonTes
for arg in args)
return _PyUnicode_FromFormat(format, *cargs)
+ def check_format(expected, format, *args):
+ text = PyUnicode_FromFormat(format, *args)
+ self.assertEqual(expected, text)
+
# ascii format, non-ascii argument
- text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
- self.assertEqual(text, 'ascii\x7f=unicode\xe9')
+ check_format('ascii\x7f=unicode\xe9',
+ b'ascii\x7f=%U', 'unicode\xe9')
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
# raises an error
@@ -1686,25 +1693,131 @@ class UnicodeTest(string_tests.CommonTes
'string, got a non-ASCII byte: 0xe9$',
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
+ # test "%c"
+ check_format('\uabcd',
+ b'%c', c_int(0xabcd))
+ check_format('\U0010ffff',
+ b'%c', c_int(0x10ffff))
+ with self.assertRaises(OverflowError):
+ PyUnicode_FromFormat(b'%c', c_int(0x110000))
+ # Issue #18183
+ check_format('\U00010000\U00100000',
+ b'%c%c', c_int(0x10000), c_int(0x100000))
- # other tests
- text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
- self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
+ # test "%"
+ check_format('%',
+ b'%')
+ check_format('%',
+ b'%%')
+ check_format('%s',
+ b'%%s')
+ check_format('[%]',
+ b'[%%]')
+ check_format('%abc',
+ b'%%%s', b'abc')
- text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
- self.assertEqual(text, 'repr=abc')
+ # test %S
+ check_format("repr=\u20acABC",
+ b'repr=%S', '\u20acABC')
+
+ # test %R
+ check_format("repr='\u20acABC'",
+ b'repr=%R', '\u20acABC')
+
+ # test integer formats (%i, %d, %u)
+ check_format('010',
+ b'%03i', c_int(10))
+ check_format('0010',
+ b'%0.4i', c_int(10))
+ check_format('-123',
+ b'%i', c_int(-123))
+
+ check_format('-123',
+ b'%d', c_int(-123))
+ check_format('-123',
+ b'%ld', c_long(-123))
+ check_format('-123',
+ b'%lld', c_longlong(-123))
+ check_format('-123',
+ b'%zd', c_ssize_t(-123))
+
+ check_format('123',
+ b'%u', c_uint(123))
+ check_format('123',
+ b'%lu', c_ulong(123))
+ check_format('123',
+ b'%llu', c_ulonglong(123))
+ check_format('123',
+ b'%zu', c_size_t(123))
+
+ # test long output
+ min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
+ max_longlong = -min_longlong - 1
+ check_format(str(min_longlong),
+ b'%lld', c_longlong(min_longlong))
+ check_format(str(max_longlong),
+ b'%lld', c_longlong(max_longlong))
+ max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
+ check_format(str(max_ulonglong),
+ b'%llu', c_ulonglong(max_ulonglong))
+ PyUnicode_FromFormat(b'%p', c_void_p(-1))
+
+ # test padding (width and/or precision)
+ check_format('123'.rjust(10, '0'),
+ b'%010i', c_int(123))
+ check_format('123'.rjust(100),
+ b'%100i', c_int(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100i', c_int(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80i', c_int(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010u', c_uint(123))
+ check_format('123'.rjust(100),
+ b'%100u', c_uint(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100u', c_uint(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80u', c_uint(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010x', c_int(0x123))
+ check_format('123'.rjust(100),
+ b'%100x', c_int(0x123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100x', c_int(0x123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80x', c_int(0x123))
+
+ # test %A
+ check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
+ b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
+
+ # test %V
+ check_format('repr=abc',
+ b'repr=%V', 'abc', b'xyz')
# Test string decode from parameter of %s using utf-8.
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
# '\u4eba\u6c11'
- text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
- self.assertEqual(text, 'repr=\u4eba\u6c11')
+ check_format('repr=\u4eba\u6c11',
+ b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
#Test replace error handler.
- text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
- self.assertEqual(text, 'repr=abc\ufffd')
+ check_format('repr=abc\ufffd',
+ b'repr=%V', None, b'abc\xff')
+
+ # not supported: copy the raw format string. these tests are just here
+ # to check for crashs and should not be considered as specifications
+ check_format('%s',
+ b'%1%s', b'abc')
+ check_format('%1abc',
+ b'%1abc')
+ check_format('%+i',
+ b'%+i', c_int(10))
+ check_format('%s',
+ b'%.%s', b'abc')
# Test PyUnicode_AsWideChar()
def test_aswidechar(self):
diff -r a0368f81af9a Misc/NEWS
--- a/Misc/NEWS Fri Dec 05 20:15:15 2014 -0500
+++ b/Misc/NEWS Tue Dec 16 13:51:07 2014 +0200
@@ -2,6 +2,18 @@
Python News
+++++++++++
+What's New in Python 3.2.7?
+============================
+
+*Release date: XXXX-XX-XX*
+
+Core and Builtins
+-----------------
+
+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
+ and fix by Guido Vranken.
+
+
What's New in Python 3.2.6?
===========================
diff -r a0368f81af9a Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Fri Dec 05 20:15:15 2014 -0500
+++ b/Objects/unicodeobject.c Tue Dec 16 13:51:07 2014 +0200
@@ -759,15 +759,10 @@ PyUnicode_FromFormatV(const char *format
* result in an array) */
for (f = format; *f; f++) {
if (*f == '%') {
- if (*(f+1)=='%')
- continue;
- if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
- ++callcount;
- while (Py_ISDIGIT((unsigned)*f))
- width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
- ;
- if (*f == 's')
+ f++;
+ while (*f && *f != '%' && !Py_ISALPHA((unsigned)*f))
+ f++;
+ if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
++callcount;
}
else if (128 <= (unsigned char)*f) {
@@ -794,12 +789,16 @@ PyUnicode_FromFormatV(const char *format
#ifdef HAVE_LONG_LONG
int longlongflag = 0;
#endif
- const char* p = f;
+ const char* p = f++;
width = 0;
while (Py_ISDIGIT((unsigned)*f))
width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
- ;
+ precision = 0;
+ if (*f == '.') {
+ f++;
+ while (Py_ISDIGIT((unsigned)*f))
+ precision = (precision*10) + *f++ - '0';
+ }
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
* they don't affect the amount of space we reserve.
@@ -823,16 +822,18 @@ PyUnicode_FromFormatV(const char *format
switch (*f) {
case 'c':
{
+ int ordinal = va_arg(count, int);
+ if (ordinal < 0 || ordinal > 0x10ffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000)");
+ goto fail;
+ }
#ifndef Py_UNICODE_WIDE
- int ordinal = va_arg(count, int);
if (ordinal > 0xffff)
n += 2;
else
- n++;
-#else
- (void)va_arg(count, int);
+#endif
n++;
-#endif
break;
}
case '%':
@@ -840,6 +841,8 @@ PyUnicode_FromFormatV(const char *format
break;
case 'd': case 'u': case 'i': case 'x':
(void) va_arg(count, int);
+ if (width < precision)
+ width = precision;
#ifdef HAVE_LONG_LONG
if (longlongflag) {
if (width < MAX_LONG_LONG_CHARS)
-------------- next part --------------
diff -r 4a9418c6f8ae Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py Fri Dec 05 20:30:54 2014 -0500
+++ b/Lib/test/test_unicode.py Tue Dec 16 13:51:18 2014 +0200
@@ -2016,9 +2016,10 @@ class UnicodeTest(string_tests.CommonTes
# Test PyUnicode_FromFormat()
def test_from_format(self):
support.import_module('ctypes')
- from ctypes import (pythonapi, py_object,
+ from ctypes import (
+ pythonapi, py_object, sizeof,
c_int, c_long, c_longlong, c_ssize_t,
- c_uint, c_ulong, c_ulonglong, c_size_t)
+ c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
name = "PyUnicode_FromFormat"
_PyUnicode_FromFormat = getattr(pythonapi, name)
_PyUnicode_FromFormat.restype = py_object
@@ -2029,9 +2030,13 @@ class UnicodeTest(string_tests.CommonTes
for arg in args)
return _PyUnicode_FromFormat(format, *cargs)
+ def check_format(expected, format, *args):
+ text = PyUnicode_FromFormat(format, *args)
+ self.assertEqual(expected, text)
+
# ascii format, non-ascii argument
- text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
- self.assertEqual(text, 'ascii\x7f=unicode\xe9')
+ check_format('ascii\x7f=unicode\xe9',
+ b'ascii\x7f=%U', 'unicode\xe9')
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
# raises an error
@@ -2041,64 +2046,136 @@ class UnicodeTest(string_tests.CommonTes
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
# test "%c"
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
+ check_format('\uabcd',
+ b'%c', c_int(0xabcd))
+ check_format('\U0010ffff',
+ b'%c', c_int(0x10ffff))
with self.assertRaises(OverflowError):
PyUnicode_FromFormat(b'%c', c_int(0x110000))
# Issue #18183
- self.assertEqual(
- PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)),
- '\U00010000\U00100000')
+ check_format('\U00010000\U00100000',
+ b'%c%c', c_int(0x10000), c_int(0x100000))
# test "%"
- self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
- self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
- self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
- self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
- self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
+ check_format('%',
+ b'%')
+ check_format('%',
+ b'%%')
+ check_format('%s',
+ b'%%s')
+ check_format('[%]',
+ b'[%%]')
+ check_format('%abc',
+ b'%%%s', b'abc')
+
+ # test %S
+ check_format("repr=\u20acABC",
+ b'repr=%S', '\u20acABC')
+
+ # test %R
+ check_format("repr='\u20acABC'",
+ b'repr=%R', '\u20acABC')
# test integer formats (%i, %d, %u)
- self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
- self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
- self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
+ check_format('010',
+ b'%03i', c_int(10))
+ check_format('0010',
+ b'%0.4i', c_int(10))
+ check_format('-123',
+ b'%i', c_int(-123))
+ check_format('-123',
+ b'%li', c_long(-123))
+ check_format('-123',
+ b'%lli', c_longlong(-123))
+ check_format('-123',
+ b'%zi', c_ssize_t(-123))
- self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
+ check_format('-123',
+ b'%d', c_int(-123))
+ check_format('-123',
+ b'%ld', c_long(-123))
+ check_format('-123',
+ b'%lld', c_longlong(-123))
+ check_format('-123',
+ b'%zd', c_ssize_t(-123))
- self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
- self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
- self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
- self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
+ check_format('123',
+ b'%u', c_uint(123))
+ check_format('123',
+ b'%lu', c_ulong(123))
+ check_format('123',
+ b'%llu', c_ulonglong(123))
+ check_format('123',
+ b'%zu', c_size_t(123))
+
+ # test long output
+ min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
+ max_longlong = -min_longlong - 1
+ check_format(str(min_longlong),
+ b'%lld', c_longlong(min_longlong))
+ check_format(str(max_longlong),
+ b'%lld', c_longlong(max_longlong))
+ max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
+ check_format(str(max_ulonglong),
+ b'%llu', c_ulonglong(max_ulonglong))
+ PyUnicode_FromFormat(b'%p', c_void_p(-1))
+
+ # test padding (width and/or precision)
+ check_format('123'.rjust(10, '0'),
+ b'%010i', c_int(123))
+ check_format('123'.rjust(100),
+ b'%100i', c_int(123))
+ check_format('123'.rjust(300, '0'),
+ b'%.300i', c_int(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80i', c_int(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010u', c_uint(123))
+ check_format('123'.rjust(100),
+ b'%100u', c_uint(123))
+ check_format('123'.rjust(300, '0'),
+ b'%.300u', c_uint(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80u', c_uint(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010x', c_int(0x123))
+ check_format('123'.rjust(100),
+ b'%100x', c_int(0x123))
+ check_format('123'.rjust(300, '0'),
+ b'%.300x', c_int(0x123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80x', c_int(0x123))
# test %A
- text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
- self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
+ check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
+ b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
# test %V
- text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
- self.assertEqual(text, 'repr=abc')
+ check_format('repr=abc',
+ b'repr=%V', 'abc', b'xyz')
# Test string decode from parameter of %s using utf-8.
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
# '\u4eba\u6c11'
- text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
- self.assertEqual(text, 'repr=\u4eba\u6c11')
+ check_format('repr=\u4eba\u6c11',
+ b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
#Test replace error handler.
- text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
- self.assertEqual(text, 'repr=abc\ufffd')
+ check_format('repr=abc\ufffd',
+ b'repr=%V', None, b'abc\xff')
# not supported: copy the raw format string. these tests are just here
# to check for crashs and should not be considered as specifications
- self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s')
- self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc')
- self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i')
- self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s')
+ check_format('%s',
+ b'%1%s', b'abc')
+ check_format('%1abc',
+ b'%1abc')
+ check_format('%+i',
+ b'%+i', c_int(10))
+ check_format('%.%s',
+ b'%.%s', b'abc')
# Test PyUnicode_AsWideChar()
@support.cpython_only
diff -r 4a9418c6f8ae Misc/NEWS
--- a/Misc/NEWS Fri Dec 05 20:30:54 2014 -0500
+++ b/Misc/NEWS Tue Dec 16 13:51:18 2014 +0200
@@ -2,6 +2,18 @@
Python News
+++++++++++
+What's New in Python 3.3.7?
+============================
+
+*Release date: XXXX-XX-XX*
+
+Core and Builtins
+-----------------
+
+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
+ and fix by Guido Vranken.
+
+
What's New in Python 3.3.6?
===========================
diff -r 4a9418c6f8ae Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Fri Dec 05 20:30:54 2014 -0500
+++ b/Objects/unicodeobject.c Tue Dec 16 13:51:18 2014 +0200
@@ -2335,6 +2335,8 @@ parse_format_flags(const char *f,
f--;
}
}
+ if (width < precision)
+ width = precision;
if (*f == '\0') {
/* bogus format "%.1" => go backward, f points to "1" */
f--;
More information about the Python-bugs-list
mailing list