From d968baa9af4998e0262a5eb8bed9c9b90391803e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 20 Jun 2025 16:27:56 +0200 Subject: [PATCH 1/4] reject negative sizes in SHAKE digests --- Lib/test/test_hashlib.py | 64 ++++++++++++++----- ...-06-20-16-28-47.gh-issue-135759.jne0Zi.rst | 2 + Modules/_hashopenssl.c | 15 ++++- Modules/clinic/sha3module.c.h | 38 ++++++++--- Modules/sha3module.c | 33 ++++++---- 5 files changed, 113 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index b83ae181718b7a..e77343f827811a 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -207,6 +207,11 @@ def hash_constructors(self): constructors = self.constructors_to_test.values() return itertools.chain.from_iterable(constructors) + @property + def shake_constructors(self): + for shake_name in self.shakes: + yield from self.constructors_to_test.get(shake_name, ()) + @property def is_fips_mode(self): return get_fips_mode() @@ -376,21 +381,50 @@ def test_hexdigest(self): self.assertIsInstance(h.digest(), bytes) self.assertEqual(hexstr(h.digest()), h.hexdigest()) - def test_digest_length_overflow(self): - # See issue #34922 - large_sizes = (2**29, 2**32-10, 2**32+10, 2**61, 2**64-10, 2**64+10) - for cons in self.hash_constructors: - h = cons(usedforsecurity=False) - if h.name not in self.shakes: - continue - if HASH is not None and isinstance(h, HASH): - # _hashopenssl's take a size_t - continue - for digest in h.digest, h.hexdigest: - self.assertRaises(ValueError, digest, -10) - for length in large_sizes: - with self.assertRaises((ValueError, OverflowError)): - digest(length) + def test_shakes_zero_digest_length(self): + for constructor in self.shake_constructors: + with self.subTest(constructor=constructor): + h = constructor(b'abcdef', usedforsecurity=False) + self.assertEqual(h.digest(0), b'') + self.assertEqual(h.hexdigest(0), '') + + def test_shakes_invalid_digest_length(self): + # See https://github.com/python/cpython/issues/79103. + for constructor in self.shake_constructors: + with self.subTest(constructor=constructor): + h = constructor(usedforsecurity=False) + # Note: digest() and hexdigest() take a signed input and + # raise if it is negative; the rationale is that we use + # internally PyBytes_FromStringAndSize() and _Py_strhex() + # which both take a Py_ssize_t. + for negative_size in (-1, -10, -(1 << 31), -sys.maxsize): + self.assertRaises(ValueError, h.digest, negative_size) + self.assertRaises(ValueError, h.hexdigest, negative_size) + + def test_shakes_overflow_digest_length(self): + # See https://github.com/python/cpython/issues/135759. + + exc_types = (OverflowError, ValueError) + # HACL* accepts an 'uint32_t' while OpenSSL accepts a 'size_t'. + openssl_overflown_sizes = (sys.maxsize + 1, 2 * sys.maxsize) + # https://github.com/python/cpython/issues/79103 restricts + # the accepted built-in lengths to 2 ** 29, even if OpenSSL + # accepts such lengths. + builtin_overflown_sizes = openssl_overflown_sizes + ( + 2 ** 29, 2 ** 32 - 10, 2 ** 32, 2 ** 32 + 10, + 2 ** 61, 2 ** 64 - 10, 2 ** 64, 2 ** 64 + 10, + ) + + for constructor in self.shake_constructors: + with self.subTest(constructor=constructor): + h = constructor(usedforsecurity=False) + if HASH is not None and isinstance(h, HASH): + overflown_sizes = openssl_overflown_sizes + else: + overflown_sizes = builtin_overflown_sizes + for invalid_size in overflown_sizes: + self.assertRaises(exc_types, h.digest, invalid_size) + self.assertRaises(exc_types, h.hexdigest, invalid_size) def test_name_attribute(self): for cons in self.hash_constructors: diff --git a/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst new file mode 100644 index 00000000000000..11c389deee4149 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst @@ -0,0 +1,2 @@ +:mod:`hashlib`: correctly reject negative digest lengths in SHAKE objects. +Patch by Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index ce9603d5db841f..75ddd7c24823f5 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -938,8 +938,14 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length) /*[clinic end generated code: output=dcb09335dd2fe908 input=3eb034ce03c55b21]*/ { EVP_MD_CTX *temp_ctx; - PyObject *retval = PyBytes_FromStringAndSize(NULL, length); + PyObject *retval; + + if (length < 0) { + PyErr_SetString(PyExc_ValueError, "negative digest length"); + return NULL; + } + retval = PyBytes_FromStringAndSize(NULL, length); if (retval == NULL) { return NULL; } @@ -986,9 +992,14 @@ _hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length) EVP_MD_CTX *temp_ctx; PyObject *retval; + if (length < 0) { + PyErr_SetString(PyExc_ValueError, "negative digest length"); + return NULL; + } + digest = (unsigned char*)PyMem_Malloc(length); if (digest == NULL) { - PyErr_NoMemory(); + (void)PyErr_NoMemory(); return NULL; } diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index 121be2c0758695..d6d44bf4c514c0 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -6,7 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_long.h" // _PyLong_UnsignedLong_Converter() +#include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_sha3_new__doc__, @@ -179,7 +179,7 @@ PyDoc_STRVAR(_sha3_shake_128_digest__doc__, {"digest", _PyCFunction_CAST(_sha3_shake_128_digest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_digest__doc__}, static PyObject * -_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length); +_sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length); static PyObject * _sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -213,15 +213,24 @@ _sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - unsigned long length; + Py_ssize_t length; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { - goto exit; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + length = ival; } return_value = _sha3_shake_128_digest_impl((SHA3object *)self, length); @@ -239,7 +248,7 @@ PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__, {"hexdigest", _PyCFunction_CAST(_sha3_shake_128_hexdigest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__}, static PyObject * -_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length); +_sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length); static PyObject * _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -273,19 +282,28 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t narg }; #undef KWTUPLE PyObject *argsbuf[1]; - unsigned long length; + Py_ssize_t length; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { - goto exit; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + length = ival; } return_value = _sha3_shake_128_hexdigest_impl((SHA3object *)self, length); exit: return return_value; } -/*[clinic end generated code: output=65e437799472b89f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c17e3ec670afe253 input=a9049054013a1b77]*/ diff --git a/Modules/sha3module.c b/Modules/sha3module.c index cfbf0cbcc042c5..55d3cbe2ba7817 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -473,16 +473,25 @@ SHA3_TYPE_SLOTS(sha3_512_slots, sha3_512__doc__, SHA3_methods, SHA3_getseters); SHA3_TYPE_SPEC(sha3_512_spec, "sha3_512", sha3_512_slots); static PyObject * -_SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) +_SHAKE_digest(SHA3object *self, Py_ssize_t digestlen, int hex) { unsigned char *digest = NULL; PyObject *result = NULL; - SHA3object *self = _SHA3object_CAST(op); - if (digestlen >= (1 << 29)) { - PyErr_SetString(PyExc_ValueError, "length is too large"); + if (digestlen < 0) { + PyErr_SetString(PyExc_ValueError, "negative digest length"); + return NULL; + } + if ((size_t)digestlen >= (1 << 29)) { + /* + * Raise OverflowError to match the semantics of OpenSSL SHAKE + * when the digest length exceeds the range of a 'Py_ssize_t'; + * the exception message will however be different in this case. + */ + PyErr_SetString(PyExc_OverflowError, "digest length is too large"); return NULL; } + digest = (unsigned char*)PyMem_Malloc(digestlen); if (digest == NULL) { return PyErr_NoMemory(); @@ -509,32 +518,32 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) /*[clinic input] _sha3.shake_128.digest - length: unsigned_long + length: Py_ssize_t Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * -_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=2313605e2f87bb8f input=93d6d6ff32904f18]*/ +_sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length) +/*[clinic end generated code: output=6c53fb71a6cff0a0 input=be03ade4b31dd54c]*/ { - return _SHAKE_digest((PyObject *)self, length, 0); + return _SHAKE_digest(self, length, 0); } /*[clinic input] _sha3.shake_128.hexdigest - length: unsigned_long + length: Py_ssize_t Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * -_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=bf8e2f1e490944a8 input=562d74e7060b56ab]*/ +_sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length) +/*[clinic end generated code: output=a27412d404f64512 input=0d84d05d7a8ccd37]*/ { - return _SHAKE_digest((PyObject *)self, length, 1); + return _SHAKE_digest(self, length, 1); } static PyObject * From 8abac269189171aceb37f1c57b70609723b360b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Jun 2025 10:45:12 +0200 Subject: [PATCH 2/4] fix warnings --- Modules/sha3module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 55d3cbe2ba7817..c9235e6a69ecb8 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -502,7 +502,8 @@ _SHAKE_digest(SHA3object *self, Py_ssize_t digestlen, int hex) * - the output length is zero -- we follow the existing behavior and return * an empty digest, without raising an error */ if (digestlen > 0) { - (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, digestlen); + (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, + (uint32_t)digestlen); } if (hex) { result = _Py_strhex((const char *)digest, digestlen); From 82829d8c6724420ace5cc4acd77e3d9fd2262ca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Jun 2025 10:46:21 +0200 Subject: [PATCH 3/4] strenghten assertions --- Modules/sha3module.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/sha3module.c b/Modules/sha3module.c index c9235e6a69ecb8..3d047e8e0fe42c 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -502,6 +502,9 @@ _SHAKE_digest(SHA3object *self, Py_ssize_t digestlen, int hex) * - the output length is zero -- we follow the existing behavior and return * an empty digest, without raising an error */ if (digestlen > 0) { +#if PY_SSIZE_T_MAX > UINT32_MAX + assert(digestlen <= (Py_ssize_t)UINT32_MAX); +#endif (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, (uint32_t)digestlen); } From 415f657cb42b2b2948e48e1ee87cf303f39b82ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Jun 2025 11:18:07 +0200 Subject: [PATCH 4/4] amend NEWS --- .../Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst index 11c389deee4149..268d7eccdabc7f 100644 --- a/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst +++ b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst @@ -1,2 +1,4 @@ -:mod:`hashlib`: correctly reject negative digest lengths in SHAKE objects. +:mod:`hashlib`: reject negative digest lengths in OpenSSL-based SHAKE objects +by raising a :exc:`ValueError`. Previously, negative lengths were implicitly +rejected by raising a :exc:`MemoryError` or a :exc:`SystemError`. Patch by Bénédikt Tran.