[Python-checkins] gh-95781: More strict format string checking in PyUnicode_FromFormatV() (GH-95784)
serhiy-storchaka
webhook-mailer at python.org
Mon Aug 8 12:21:37 EDT 2022
https://github.com/python/cpython/commit/62f06508e76e023a81861caee6a45e1d639bf530
commit: 62f06508e76e023a81861caee6a45e1d639bf530
branch: main
author: Serhiy Storchaka <storchaka at gmail.com>
committer: serhiy-storchaka <storchaka at gmail.com>
date: 2022-08-08T19:21:07+03:00
summary:
gh-95781: More strict format string checking in PyUnicode_FromFormatV() (GH-95784)
An unrecognized format character in PyUnicode_FromFormat() and
PyUnicode_FromFormatV() now sets a SystemError.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.
files:
A Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst
M Doc/c-api/unicode.rst
M Doc/whatsnew/3.12.rst
M Lib/test/test_unicode.py
M Objects/unicodeobject.c
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 339ee35c7aa4..99afebd762a4 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -477,9 +477,6 @@ APIs:
| | | :c:func:`PyObject_Repr`. |
+-------------------+---------------------+----------------------------------+
- An unrecognized format character causes all the rest of the format string to be
- copied as-is to the result string, and any extra arguments discarded.
-
.. note::
The width formatter unit is number of characters rather than bytes.
The precision formatter unit is number of bytes for ``"%s"`` and
@@ -500,6 +497,11 @@ APIs:
Support width and precision formatter for ``"%s"``, ``"%A"``, ``"%U"``,
``"%V"``, ``"%S"``, ``"%R"`` added.
+ .. versionchanged:: 3.12
+ An unrecognized format character now sets a :exc:`SystemError`.
+ In previous versions it caused all the rest of the format string to be
+ copied as-is to the result string, and any extra arguments discarded.
+
.. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index f1696cc4584c..6df122acba71 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -469,6 +469,12 @@ Porting to Python 3.12
:py:meth:`~class.__subclasses__` (using :c:func:`PyObject_CallMethod`,
for example).
+* An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
+ :c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
+ In previous versions it caused all the rest of the format string to be
+ copied as-is to the result string, and any extra arguments discarded.
+ (Contributed by Serhiy Storchaka in :gh:`95781`.)
+
Deprecated
----------
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 9765ed97a60a..63bccb72e046 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2641,8 +2641,6 @@ def check_format(expected, format, *args):
b'%c%c', c_int(0x10000), c_int(0x100000))
# test "%"
- check_format('%',
- b'%')
check_format('%',
b'%%')
check_format('%s',
@@ -2819,23 +2817,22 @@ def check_format(expected, format, *args):
check_format('repr=abc\ufffd',
b'repr=%V', None, b'abc\xff')
- # not supported: copy the raw format string. these tests are just here
- # to check for crashes and should not be considered as specifications
- check_format('%s',
- b'%1%s', b'abc')
- check_format('%1abc',
- b'%1abc')
- check_format('%+i',
- b'%+i', c_int(10))
- check_format('%.%s',
- b'%.%s', b'abc')
-
# Issue #33817: empty strings
check_format('',
b'')
check_format('',
b'%s', b'')
+ # check for crashes
+ for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
+ b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
+ b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'):
+ with self.subTest(fmt=fmt):
+ self.assertRaisesRegex(SystemError, 'invalid format string',
+ PyUnicode_FromFormat, fmt, b'abc')
+ self.assertRaisesRegex(SystemError, 'invalid format string',
+ PyUnicode_FromFormat, b'%+i', c_int(10))
+
# Test PyUnicode_AsWideChar()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
diff --git a/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst b/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst
new file mode 100644
index 000000000000..eb2fd7e9da3d
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst
@@ -0,0 +1,4 @@
+An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
+:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
+In previous versions it caused all the rest of the format string to be
+copied as-is to the result string, and any extra arguments discarded.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7ff79953257e..184a2bfd5dd8 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2355,6 +2355,13 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
p = f;
f++;
+ if (*f == '%') {
+ if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+ return NULL;
+ f++;
+ return f;
+ }
+
zeropad = 0;
if (*f == '0') {
zeropad = 1;
@@ -2392,14 +2399,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
f++;
}
}
- if (*f == '%') {
- /* "%.3%s" => f points to "3" */
- f--;
- }
- }
- if (*f == '\0') {
- /* bogus format "%.123" => go backward, f points to "3" */
- f--;
}
/* Handle %ld, %lu, %lld and %llu. */
@@ -2423,7 +2422,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
++f;
}
- if (f[1] == '\0')
+ if (f[0] != '\0' && f[1] == '\0')
writer->overallocate = 0;
switch (*f) {
@@ -2616,21 +2615,9 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
break;
}
- case '%':
- if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
- return NULL;
- break;
-
default:
- /* if we stumble upon an unknown formatting code, copy the rest
- of the format string to the output string. (we cannot just
- skip the code, since there's no way to know what's in the
- argument list) */
- len = strlen(p);
- if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
- return NULL;
- f = p+len;
- return f;
+ PyErr_Format(PyExc_SystemError, "invalid format string: %s", p);
+ return NULL;
}
f++;
More information about the Python-checkins
mailing list