From 479c7fbf63c079fb488327cbee2cc23c564660d7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 9 Jan 2025 13:03:56 +0100 Subject: [PATCH 1/7] gh-59705: Implement _thread.set_name() on Windows Implement set_name() with SetThreadDescription() and _get_name() with GetThreadDescription(). If SetThreadDescription() or GetThreadDescription() is not available in kernelbase.dll, delete the method when the _thread module is imported. Truncate the thread name to an arbitrary limit of 64 characters. set_name() raises ValueError if the name contains an embedded null character. Co-authored-by: Eryk Sun --- Lib/test/test_threading.py | 30 ++++++++----- Lib/threading.py | 4 +- Modules/_threadmodule.c | 77 +++++++++++++++++++++++++++++++- Modules/clinic/_threadmodule.c.h | 10 ++--- PC/pyconfig.h.in | 4 ++ 5 files changed, 105 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 3e164a12581dd1..5485b5d645eb43 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2118,10 +2118,6 @@ def test_set_name(self): # test short non-ASCII name "namé€", - # embedded null character: name is truncated - # at the first null character - "embed\0null", - # Test long ASCII names (not truncated) "x" * limit, @@ -2131,6 +2127,13 @@ def test_set_name(self): # Test long non-ASCII name (truncated) "x" * (limit - 1) + "é€", ] + # set_name() raises ValueError on Windows if the name contains an + # embedded null character, error ignored silently by the threading + # module. + if not support.MS_WINDOWS: + # embedded null character: name is truncated + # at the first null character + tests.append("embed\0null") if os_helper.FS_NONASCII: tests.append(f"nonascii:{os_helper.FS_NONASCII}") if os_helper.TESTFN_UNENCODABLE: @@ -2146,15 +2149,18 @@ def work(): work_name = _thread._get_name() for name in tests: - encoded = name.encode(encoding, "replace") - if b'\0' in encoded: - encoded = encoded.split(b'\0', 1)[0] - if truncate is not None: - encoded = encoded[:truncate] - if sys.platform.startswith("solaris"): - expected = encoded.decode("utf-8", "surrogateescape") + if not support.MS_WINDOWS: + encoded = name.encode(encoding, "replace") + if b'\0' in encoded: + encoded = encoded.split(b'\0', 1)[0] + if truncate is not None: + encoded = encoded[:truncate] + if sys.platform.startswith("solaris"): + expected = encoded.decode("utf-8", "surrogateescape") + else: + expected = os.fsdecode(encoded) else: - expected = os.fsdecode(encoded) + expected = name[:truncate] with self.subTest(name=name, expected=expected): work_name = None diff --git a/Lib/threading.py b/Lib/threading.py index 78e591124278fc..b9c980d954c300 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -1031,7 +1031,9 @@ def _set_os_name(self): return try: _set_name(self._name) - except OSError: + except (OSError, ValueError): + # On Windows, ValueError is raised if name contains an embedded + # null character. pass def _bootstrap_inner(self): diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 2cbdfeb09b95ae..7fdf62a8be5fe6 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -47,6 +47,14 @@ get_thread_state(PyObject *module) } +#ifdef MS_WINDOWS +typedef HRESULT (WINAPI *PF_GET_THREAD_DESCRIPTION)(HANDLE, PCWSTR*); +typedef HRESULT (WINAPI *PF_SET_THREAD_DESCRIPTION)(HANDLE, PCWSTR); +static PF_GET_THREAD_DESCRIPTION pGetThreadDescription = NULL; +static PF_SET_THREAD_DESCRIPTION pSetThreadDescription = NULL; +#endif + + /*[clinic input] module _thread [clinic start generated code]*/ @@ -2364,7 +2372,7 @@ Internal only. Return a non-zero integer that uniquely identifies the main threa of the main interpreter."); -#ifdef HAVE_PTHREAD_GETNAME_NP +#if defined(HAVE_PTHREAD_GETNAME_NP) || defined(MS_WINDOWS) /*[clinic input] _thread._get_name @@ -2375,6 +2383,7 @@ static PyObject * _thread__get_name_impl(PyObject *module) /*[clinic end generated code: output=20026e7ee3da3dd7 input=35cec676833d04c8]*/ { +#ifndef MS_WINDOWS // Linux and macOS are limited to respectively 16 and 64 bytes char name[100]; pthread_t thread = pthread_self(); @@ -2389,11 +2398,26 @@ _thread__get_name_impl(PyObject *module) #else return PyUnicode_DecodeFSDefault(name); #endif +#else + // Windows implementation + assert(pGetThreadDescription != NULL); + + wchar_t *name; + HRESULT hr = pGetThreadDescription(GetCurrentThread(), &name); + if (FAILED(hr)) { + PyErr_SetFromWindowsErr(0); + return NULL; + } + + PyObject *name_obj = PyUnicode_FromWideChar(name, -1); + LocalFree(name); + return name_obj; +#endif } #endif // HAVE_PTHREAD_GETNAME_NP -#ifdef HAVE_PTHREAD_SETNAME_NP +#if defined(HAVE_PTHREAD_SETNAME_NP) || defined(MS_WINDOWS) /*[clinic input] _thread.set_name @@ -2406,6 +2430,7 @@ static PyObject * _thread_set_name_impl(PyObject *module, PyObject *name_obj) /*[clinic end generated code: output=402b0c68e0c0daed input=7e7acd98261be82f]*/ { +#ifndef MS_WINDOWS #ifdef __sun // Solaris always uses UTF-8 const char *encoding = "utf-8"; @@ -2451,6 +2476,29 @@ _thread_set_name_impl(PyObject *module, PyObject *name_obj) return PyErr_SetFromErrno(PyExc_OSError); } Py_RETURN_NONE; +#else + // Windows implementation + assert(pSetThreadDescription != NULL); + + Py_ssize_t len; + wchar_t *name = PyUnicode_AsWideCharString(name_obj, &len); + if (name == NULL) { + return NULL; + } + + if (len > PYTHREAD_NAME_MAXLEN) { + // Truncate the name + name[PYTHREAD_NAME_MAXLEN] = 0; + } + + HRESULT hr = pSetThreadDescription(GetCurrentThread(), name); + PyMem_Free(name); + if (FAILED(hr)) { + PyErr_SetFromWindowsErr(0); + return NULL; + } + Py_RETURN_NONE; +#endif } #endif // HAVE_PTHREAD_SETNAME_NP @@ -2594,6 +2642,31 @@ thread_module_exec(PyObject *module) } #endif +#ifdef MS_WINDOWS + HMODULE kernelbase = GetModuleHandleW(L"kernelbase.dll"); + if (kernelbase != NULL) { + if (pGetThreadDescription == NULL) { + pGetThreadDescription = (PF_GET_THREAD_DESCRIPTION)GetProcAddress( + kernelbase, "GetThreadDescription"); + } + if (pSetThreadDescription == NULL) { + pSetThreadDescription = (PF_SET_THREAD_DESCRIPTION)GetProcAddress( + kernelbase, "SetThreadDescription"); + } + + if (pGetThreadDescription == NULL) { + if (PyObject_DelAttrString(module, "_get_name") < 0) { + return -1; + } + } + if (pSetThreadDescription == NULL) { + if (PyObject_DelAttrString(module, "set_name") < 0) { + return -1; + } + } + } +#endif + return 0; } diff --git a/Modules/clinic/_threadmodule.c.h b/Modules/clinic/_threadmodule.c.h index 8f0507d40285b3..09b7afebd6d8d9 100644 --- a/Modules/clinic/_threadmodule.c.h +++ b/Modules/clinic/_threadmodule.c.h @@ -8,7 +8,7 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -#if defined(HAVE_PTHREAD_GETNAME_NP) +#if (defined(HAVE_PTHREAD_GETNAME_NP) || defined(MS_WINDOWS)) PyDoc_STRVAR(_thread__get_name__doc__, "_get_name($module, /)\n" @@ -28,9 +28,9 @@ _thread__get_name(PyObject *module, PyObject *Py_UNUSED(ignored)) return _thread__get_name_impl(module); } -#endif /* defined(HAVE_PTHREAD_GETNAME_NP) */ +#endif /* (defined(HAVE_PTHREAD_GETNAME_NP) || defined(MS_WINDOWS)) */ -#if defined(HAVE_PTHREAD_SETNAME_NP) +#if (defined(HAVE_PTHREAD_SETNAME_NP) || defined(MS_WINDOWS)) PyDoc_STRVAR(_thread_set_name__doc__, "set_name($module, /, name)\n" @@ -92,7 +92,7 @@ _thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb return return_value; } -#endif /* defined(HAVE_PTHREAD_SETNAME_NP) */ +#endif /* (defined(HAVE_PTHREAD_SETNAME_NP) || defined(MS_WINDOWS)) */ #ifndef _THREAD__GET_NAME_METHODDEF #define _THREAD__GET_NAME_METHODDEF @@ -101,4 +101,4 @@ _thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb #ifndef _THREAD_SET_NAME_METHODDEF #define _THREAD_SET_NAME_METHODDEF #endif /* !defined(_THREAD_SET_NAME_METHODDEF) */ -/*[clinic end generated code: output=b5cb85aaccc45bf6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6e88ef6b126cece8 input=a9049054013a1b77]*/ diff --git a/PC/pyconfig.h.in b/PC/pyconfig.h.in index 010f5fe5646630..9bb0da0f99e116 100644 --- a/PC/pyconfig.h.in +++ b/PC/pyconfig.h.in @@ -753,4 +753,8 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ /* Define if libssl has X509_VERIFY_PARAM_set1_host and related function */ #define HAVE_X509_VERIFY_PARAM_SET1_HOST 1 +// Truncate the thread name to 64 characters. The OS limit is 32766 wide +// characters, but long names aren't of practical use. +#define PYTHREAD_NAME_MAXLEN 64 + #endif /* !Py_CONFIG_H */ From d5881039d94fffdbecdcda4c1851563d75309320 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 9 Jan 2025 13:47:04 +0100 Subject: [PATCH 2/7] Embedded null characters are accepted --- Lib/test/test_threading.py | 13 ++++++------- Lib/threading.py | 4 +--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 5485b5d645eb43..3030580aacd655 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2118,6 +2118,10 @@ def test_set_name(self): # test short non-ASCII name "namé€", + # embedded null character: name is truncated + # at the first null character + "embed\0null", + # Test long ASCII names (not truncated) "x" * limit, @@ -2127,13 +2131,6 @@ def test_set_name(self): # Test long non-ASCII name (truncated) "x" * (limit - 1) + "é€", ] - # set_name() raises ValueError on Windows if the name contains an - # embedded null character, error ignored silently by the threading - # module. - if not support.MS_WINDOWS: - # embedded null character: name is truncated - # at the first null character - tests.append("embed\0null") if os_helper.FS_NONASCII: tests.append(f"nonascii:{os_helper.FS_NONASCII}") if os_helper.TESTFN_UNENCODABLE: @@ -2161,6 +2158,8 @@ def work(): expected = os.fsdecode(encoded) else: expected = name[:truncate] + if '\0' in expected: + expected = expected.split('\0', 1)[0] with self.subTest(name=name, expected=expected): work_name = None diff --git a/Lib/threading.py b/Lib/threading.py index b9c980d954c300..78e591124278fc 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -1031,9 +1031,7 @@ def _set_os_name(self): return try: _set_name(self._name) - except (OSError, ValueError): - # On Windows, ValueError is raised if name contains an embedded - # null character. + except OSError: pass def _bootstrap_inner(self): From 8f2ead733d5dc9bbf7a927e0bb55f97331edfb05 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 16 Jan 2025 11:31:52 +0100 Subject: [PATCH 3/7] Fix trailing surrogate characters --- Lib/test/test_threading.py | 9 +++++++++ Modules/_threadmodule.c | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 3030580aacd655..cf4ff97804b5ae 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2130,6 +2130,11 @@ def test_set_name(self): # Test long non-ASCII name (truncated) "x" * (limit - 1) + "é€", + + # Test long non-BMP names (truncated) creating surrogate pairs + # on Windows + "x" * (limit - 1) + "\U0010FFFF", + "x" * (limit - 2) + "\U0010FFFF" * 2, ] if os_helper.FS_NONASCII: tests.append(f"nonascii:{os_helper.FS_NONASCII}") @@ -2158,6 +2163,10 @@ def work(): expected = os.fsdecode(encoded) else: expected = name[:truncate] + if ord(expected[-1]) > 0xFFFF: + # truncate the last non-BMP character to omit a lone + # surrogate character + expected = expected[:-1] if '\0' in expected: expected = expected.split('\0', 1)[0] diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 7fdf62a8be5fe6..dd91bd6679574b 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -2488,7 +2488,13 @@ _thread_set_name_impl(PyObject *module, PyObject *name_obj) if (len > PYTHREAD_NAME_MAXLEN) { // Truncate the name - name[PYTHREAD_NAME_MAXLEN] = 0; + Py_UCS4 ch = name[PYTHREAD_NAME_MAXLEN-1]; + if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) { + name[PYTHREAD_NAME_MAXLEN-1] = 0; + } + else { + name[PYTHREAD_NAME_MAXLEN] = 0; + } } HRESULT hr = pSetThreadDescription(GetCurrentThread(), name); From f1c523207b24198548abb0ec349829403b4188a4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 17 Jan 2025 13:03:13 +0100 Subject: [PATCH 4/7] Add more non-BMP tests; fix test on Windows --- Lib/test/test_threading.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index cf4ff97804b5ae..214e1ba0b53dd2 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2135,6 +2135,10 @@ def test_set_name(self): # on Windows "x" * (limit - 1) + "\U0010FFFF", "x" * (limit - 2) + "\U0010FFFF" * 2, + "x" + "\U0001f40d" * limit, + "xx" + "\U0001f40d" * limit, + "xxx" + "\U0001f40d" * limit, + "xxxx" + "\U0001f40d" * limit, ] if os_helper.FS_NONASCII: tests.append(f"nonascii:{os_helper.FS_NONASCII}") @@ -2162,11 +2166,18 @@ def work(): else: expected = os.fsdecode(encoded) else: - expected = name[:truncate] - if ord(expected[-1]) > 0xFFFF: - # truncate the last non-BMP character to omit a lone - # surrogate character - expected = expected[:-1] + size = 0 + chars = [] + for ch in name: + if ord(ch) > 0xFFFF: + size += 2 + else: + size += 1 + if size > truncate: + break + chars.append(ch) + expected = ''.join(chars) + if '\0' in expected: expected = expected.split('\0', 1)[0] From ba09ba1e20d8a2eb216e4b0394dd85cab9f69146 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 17 Jan 2025 13:17:44 +0100 Subject: [PATCH 5/7] Set limit to 32766 --- PC/pyconfig.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PC/pyconfig.h.in b/PC/pyconfig.h.in index 9bb0da0f99e116..837461d0e884bc 100644 --- a/PC/pyconfig.h.in +++ b/PC/pyconfig.h.in @@ -755,6 +755,6 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ // Truncate the thread name to 64 characters. The OS limit is 32766 wide // characters, but long names aren't of practical use. -#define PYTHREAD_NAME_MAXLEN 64 +#define PYTHREAD_NAME_MAXLEN 32766 #endif /* !Py_CONFIG_H */ From 8148a96580c5593e670434c2236041c016ca4f6c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 17 Jan 2025 13:17:53 +0100 Subject: [PATCH 6/7] Fix set_name() error handling --- Modules/_threadmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index dd91bd6679574b..48f3744377e46c 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -2500,7 +2500,7 @@ _thread_set_name_impl(PyObject *module, PyObject *name_obj) HRESULT hr = pSetThreadDescription(GetCurrentThread(), name); PyMem_Free(name); if (FAILED(hr)) { - PyErr_SetFromWindowsErr(0); + PyErr_SetFromWindowsErr((int)hr); return NULL; } Py_RETURN_NONE; From 0ff056727bea15988be5455cb0a09586f89ea055 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 17 Jan 2025 13:38:22 +0100 Subject: [PATCH 7/7] Fix thread_module_exec() Remove the module methods if GetModuleHandleW() fails. --- Modules/_threadmodule.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 48f3744377e46c..1abb847bd80c60 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -2659,16 +2659,16 @@ thread_module_exec(PyObject *module) pSetThreadDescription = (PF_SET_THREAD_DESCRIPTION)GetProcAddress( kernelbase, "SetThreadDescription"); } + } - if (pGetThreadDescription == NULL) { - if (PyObject_DelAttrString(module, "_get_name") < 0) { - return -1; - } + if (pGetThreadDescription == NULL) { + if (PyObject_DelAttrString(module, "_get_name") < 0) { + return -1; } - if (pSetThreadDescription == NULL) { - if (PyObject_DelAttrString(module, "set_name") < 0) { - return -1; - } + } + if (pSetThreadDescription == NULL) { + if (PyObject_DelAttrString(module, "set_name") < 0) { + return -1; } } #endif