diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 339ee35c7aa474..99afebd762a456 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -477,9 +477,6 @@ APIs: | | | :c:func:`PyObject_Repr`. | +-------------------+---------------------+----------------------------------+ - An unrecognized format character causes all the rest of the format string to be - copied as-is to the result string, and any extra arguments discarded. - .. note:: The width formatter unit is number of characters rather than bytes. The precision formatter unit is number of bytes for ``"%s"`` and @@ -500,6 +497,11 @@ APIs: Support width and precision formatter for ``"%s"``, ``"%A"``, ``"%U"``, ``"%V"``, ``"%S"``, ``"%R"`` added. + .. versionchanged:: 3.12 + An unrecognized format character now sets a :exc:`SystemError`. + In previous versions it caused all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + .. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index ddf9e1f6a59b47..02215524cfd671 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -461,6 +461,12 @@ Porting to Python 3.12 :py:meth:`~class.__subclasses__` (using :c:func:`PyObject_CallMethod`, for example). +* An unrecognized format character in :c:func:`PyUnicode_FromFormat` and + :c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`. + In previous versions it caused all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + (Contributed by Serhiy Storchaka in :gh:`95781`.) + Deprecated ---------- diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9765ed97a60a44..63bccb72e04646 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2641,8 +2641,6 @@ def check_format(expected, format, *args): b'%c%c', c_int(0x10000), c_int(0x100000)) # test "%" - check_format('%', - b'%') check_format('%', b'%%') check_format('%s', @@ -2819,23 +2817,22 @@ def check_format(expected, format, *args): check_format('repr=abc\ufffd', b'repr=%V', None, b'abc\xff') - # not supported: copy the raw format string. these tests are just here - # to check for crashes and should not be considered as specifications - check_format('%s', - b'%1%s', b'abc') - check_format('%1abc', - b'%1abc') - check_format('%+i', - b'%+i', c_int(10)) - check_format('%.%s', - b'%.%s', b'abc') - # Issue #33817: empty strings check_format('', b'') check_format('', b'%s', b'') + # check for crashes + for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1', + b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc', + b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'): + with self.subTest(fmt=fmt): + self.assertRaisesRegex(SystemError, 'invalid format string', + PyUnicode_FromFormat, fmt, b'abc') + self.assertRaisesRegex(SystemError, 'invalid format string', + PyUnicode_FromFormat, b'%+i', c_int(10)) + # Test PyUnicode_AsWideChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') diff --git a/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst b/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst new file mode 100644 index 00000000000000..eb2fd7e9da3d81 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst @@ -0,0 +1,4 @@ +An unrecognized format character in :c:func:`PyUnicode_FromFormat` and +:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`. +In previous versions it caused all the rest of the format string to be +copied as-is to the result string, and any extra arguments discarded. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7ff79953257ee6..184a2bfd5dd869 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2355,6 +2355,13 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, p = f; f++; + if (*f == '%') { + if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) + return NULL; + f++; + return f; + } + zeropad = 0; if (*f == '0') { zeropad = 1; @@ -2392,14 +2399,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, f++; } } - if (*f == '%') { - /* "%.3%s" => f points to "3" */ - f--; - } - } - if (*f == '\0') { - /* bogus format "%.123" => go backward, f points to "3" */ - f--; } /* Handle %ld, %lu, %lld and %llu. */ @@ -2423,7 +2422,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, ++f; } - if (f[1] == '\0') + if (f[0] != '\0' && f[1] == '\0') writer->overallocate = 0; switch (*f) { @@ -2616,21 +2615,9 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, break; } - case '%': - if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) - return NULL; - break; - default: - /* if we stumble upon an unknown formatting code, copy the rest - of the format string to the output string. (we cannot just - skip the code, since there's no way to know what's in the - argument list) */ - len = strlen(p); - if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1) - return NULL; - f = p+len; - return f; + PyErr_Format(PyExc_SystemError, "invalid format string: %s", p); + return NULL; } f++;