From 0a85bf3081010ba5dd2d57c261a2c0dae2ba412d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 14 Nov 2023 18:38:10 +0200 Subject: [PATCH 1/3] gh-111942: Fix SystemError in the TextIOWrapper constructor In non-debug more the check for the "errors" argument is skipped, and then PyUnicode_AsUTF8() can fail, but its result was not checked. --- .../2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst | 2 ++ Modules/_io/textio.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst diff --git a/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst b/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst new file mode 100644 index 00000000000000..ca58a6fa5d6ae1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst @@ -0,0 +1,2 @@ +Fix SystemError in the TextIOWrapper constructor with non-encodable "errors" +argument in non-debug mode. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 5c37e99067f886..cdfc78011460fb 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1112,6 +1112,10 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, else if (io_check_errors(errors)) { return -1; } + const char *errors_str = PyUnicode_AsUTF8(errors); + if (errors_str == NULL) { + return -1; + } if (validate_newline(newline) < 0) { return -1; @@ -1184,11 +1188,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, /* Build the decoder object */ _PyIO_State *state = find_io_state_by_def(Py_TYPE(self)); self->state = state; - if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) + if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0) goto error; /* Build the encoder object */ - if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) + if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0) goto error; /* Finished sorting out the codec details */ From f29052053839b1d2958679ceacc09abdb5a86429 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 14 Nov 2023 19:39:07 +0100 Subject: [PATCH 2/3] Update Modules/_io/textio.c --- Modules/_io/textio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index cdfc78011460fb..8d19198502fe12 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1112,7 +1112,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, else if (io_check_errors(errors)) { return -1; } - const char *errors_str = PyUnicode_AsUTF8(errors); + const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors); if (errors_str == NULL) { return -1; } From 43b222c9757f8776c0c2f1906ade4090ec353381 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 14 Nov 2023 19:43:16 +0100 Subject: [PATCH 3/3] Enable test on embedded null in errors on _io test --- Lib/test/test_io.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index fe622e836ddac2..ab3389215e93d0 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2725,9 +2725,7 @@ def test_constructor(self): if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: with self.assertRaises(UnicodeEncodeError): t.__init__(b, encoding="utf-8", errors='\udcfe') - if support.Py_DEBUG or sys.flags.dev_mode: - # TODO: If encoded to UTF-8, should also be checked for - # embedded null characters. + if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: with self.assertRaises(ValueError): t.__init__(b, encoding="utf-8", errors='replace\0') with self.assertRaises(TypeError):