gh-99108: Release the GIL around hashlib built-in computation (#104675)

This matches the GIL releasing behavior of our existing `_hashopenssl`
module, extending it to the HACL* built-ins.

Includes adding comments to better describe the ENTER/LEAVE macros
purpose and explain the lock strategy in both existing and new code.
This commit is contained in:
Gregory P. Smith 2023-05-22 17:06:41 -07:00 committed by GitHub
parent 988c1f68ce
commit 2e5d8a90aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 207 additions and 15 deletions

View file

@ -0,0 +1,3 @@
We now release the GIL around built-in :mod:`hashlib` computations of
reasonable size for the SHA families and MD5 hash functions, matching
what our OpenSSL backed hash computations already does.

View file

@ -227,12 +227,16 @@ get_hashlib_state(PyObject *module)
typedef struct {
PyObject_HEAD
EVP_MD_CTX *ctx; /* OpenSSL message digest context */
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock; /* OpenSSL context lock */
} EVPobject;
typedef struct {
PyObject_HEAD
HMAC_CTX *ctx; /* OpenSSL hmac context */
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock; /* HMAC context lock */
} HMACobject;
@ -896,6 +900,8 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj,
if (view.buf && view.len) {
if (view.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
result = EVP_hash(self, view.buf, view.len);
Py_END_ALLOW_THREADS

View file

@ -37,6 +37,13 @@
* LEAVE_HASHLIB block or explicitly acquire and release the lock inside
* a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for
* an operation.
*
* These only drop the GIL if the lock acquisition itself is likely to
* block. Thus the non-blocking acquire gating the GIL release for a
* blocking lock acquisition. The intent of these macros is to surround
* the assumed always "fast" operations that you aren't releasing the
* GIL around. Otherwise use code similar to what you see in hash
* function update() methods.
*/
#include "pythread.h"
@ -53,7 +60,7 @@
PyThread_release_lock((obj)->lock); \
}
/* TODO(gps): We should probably make this a module or EVPobject attribute
/* TODO(gpshead): We should make this a module or class attribute
* to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048

View file

@ -49,7 +49,9 @@ typedef long long MD5_INT64; /* 64-bit integer */
typedef struct {
PyObject_HEAD
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock;
Hacl_Streaming_MD5_state *hash_state;
} MD5object;
@ -72,6 +74,7 @@ static MD5object *
newMD5object(MD5State * st)
{
MD5object *md5 = (MD5object *)PyObject_GC_New(MD5object, st->md5_type);
md5->lock = NULL;
PyObject_GC_Track(md5);
return md5;
}
@ -88,6 +91,9 @@ static void
MD5_dealloc(MD5object *ptr)
{
Hacl_Streaming_MD5_legacy_free(ptr->hash_state);
if (ptr->lock != NULL) {
PyThread_free_lock(ptr->lock);
}
PyTypeObject *tp = Py_TYPE(ptr);
PyObject_GC_UnTrack(ptr);
PyObject_GC_Del(ptr);
@ -115,7 +121,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls)
if ((newobj = newMD5object(st))==NULL)
return NULL;
ENTER_HASHLIB(self);
newobj->hash_state = Hacl_Streaming_MD5_legacy_copy(self->hash_state);
LEAVE_HASHLIB(self);
return (PyObject *)newobj;
}
@ -130,7 +138,9 @@ MD5Type_digest_impl(MD5object *self)
/*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
{
unsigned char digest[MD5_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Streaming_MD5_legacy_finish(self->hash_state, digest);
LEAVE_HASHLIB(self);
return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
}
@ -145,7 +155,9 @@ MD5Type_hexdigest_impl(MD5object *self)
/*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
{
unsigned char digest[MD5_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Streaming_MD5_legacy_finish(self->hash_state, digest);
LEAVE_HASHLIB(self);
return _Py_strhex((const char*)digest, MD5_DIGESTSIZE);
}
@ -177,7 +189,18 @@ MD5Type_update(MD5object *self, PyObject *obj)
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
update(self->hash_state, buf.buf, buf.len);
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
}
if (self->lock != NULL) {
Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
update(self->hash_state, buf.buf, buf.len);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
} else {
update(self->hash_state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
Py_RETURN_NONE;
@ -279,7 +302,15 @@ _md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity)
return NULL;
}
if (string) {
update(new->hash_state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update(new->hash_state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
update(new->hash_state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}

View file

@ -48,7 +48,9 @@ typedef long long SHA1_INT64; /* 64-bit integer */
typedef struct {
PyObject_HEAD
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock;
Hacl_Streaming_SHA1_state *hash_state;
} SHA1object;
@ -71,6 +73,7 @@ static SHA1object *
newSHA1object(SHA1State *st)
{
SHA1object *sha = (SHA1object *)PyObject_GC_New(SHA1object, st->sha1_type);
sha->lock = NULL;
PyObject_GC_Track(sha);
return sha;
}
@ -88,6 +91,9 @@ static void
SHA1_dealloc(SHA1object *ptr)
{
Hacl_Streaming_SHA1_legacy_free(ptr->hash_state);
if (ptr->lock != NULL) {
PyThread_free_lock(ptr->lock);
}
PyTypeObject *tp = Py_TYPE(ptr);
PyObject_GC_UnTrack(ptr);
PyObject_GC_Del(ptr);
@ -115,7 +121,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls)
if ((newobj = newSHA1object(st)) == NULL)
return NULL;
ENTER_HASHLIB(self);
newobj->hash_state = Hacl_Streaming_SHA1_legacy_copy(self->hash_state);
LEAVE_HASHLIB(self);
return (PyObject *)newobj;
}
@ -130,7 +138,9 @@ SHA1Type_digest_impl(SHA1object *self)
/*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/
{
unsigned char digest[SHA1_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Streaming_SHA1_legacy_finish(self->hash_state, digest);
LEAVE_HASHLIB(self);
return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE);
}
@ -145,7 +155,9 @@ SHA1Type_hexdigest_impl(SHA1object *self)
/*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/
{
unsigned char digest[SHA1_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Streaming_SHA1_legacy_finish(self->hash_state, digest);
LEAVE_HASHLIB(self);
return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE);
}
@ -177,7 +189,18 @@ SHA1Type_update(SHA1object *self, PyObject *obj)
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
update(self->hash_state, buf.buf, buf.len);
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
}
if (self->lock != NULL) {
Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
update(self->hash_state, buf.buf, buf.len);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
} else {
update(self->hash_state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
Py_RETURN_NONE;
@ -279,7 +302,15 @@ _sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity)
return NULL;
}
if (string) {
update(new->hash_state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update(new->hash_state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
update(new->hash_state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}

View file

@ -52,12 +52,18 @@ class SHA512Type "SHA512object *" "&PyType_Type"
typedef struct {
PyObject_HEAD
int digestsize;
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock;
Hacl_Streaming_SHA2_state_sha2_256 *state;
} SHA256object;
typedef struct {
PyObject_HEAD
int digestsize;
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock;
Hacl_Streaming_SHA2_state_sha2_512 *state;
} SHA512object;
@ -100,6 +106,7 @@ newSHA224object(sha2_state *state)
if (!sha) {
return NULL;
}
sha->lock = NULL;
PyObject_GC_Track(sha);
return sha;
}
@ -112,6 +119,7 @@ newSHA256object(sha2_state *state)
if (!sha) {
return NULL;
}
sha->lock = NULL;
PyObject_GC_Track(sha);
return sha;
}
@ -124,6 +132,7 @@ newSHA384object(sha2_state *state)
if (!sha) {
return NULL;
}
sha->lock = NULL;
PyObject_GC_Track(sha);
return sha;
}
@ -136,6 +145,7 @@ newSHA512object(sha2_state *state)
if (!sha) {
return NULL;
}
sha->lock = NULL;
PyObject_GC_Track(sha);
return sha;
}
@ -153,6 +163,9 @@ static void
SHA256_dealloc(SHA256object *ptr)
{
Hacl_Streaming_SHA2_free_256(ptr->state);
if (ptr->lock != NULL) {
PyThread_free_lock(ptr->lock);
}
PyTypeObject *tp = Py_TYPE(ptr);
PyObject_GC_UnTrack(ptr);
PyObject_GC_Del(ptr);
@ -163,6 +176,9 @@ static void
SHA512_dealloc(SHA512object *ptr)
{
Hacl_Streaming_SHA2_free_512(ptr->state);
if (ptr->lock != NULL) {
PyThread_free_lock(ptr->lock);
}
PyTypeObject *tp = Py_TYPE(ptr);
PyObject_GC_UnTrack(ptr);
PyObject_GC_Del(ptr);
@ -229,7 +245,9 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls)
}
}
ENTER_HASHLIB(self);
SHA256copy(self, newobj);
LEAVE_HASHLIB(self);
return (PyObject *)newobj;
}
@ -259,7 +277,9 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls)
}
}
ENTER_HASHLIB(self);
SHA512copy(self, newobj);
LEAVE_HASHLIB(self);
return (PyObject *)newobj;
}
@ -275,9 +295,11 @@ SHA256Type_digest_impl(SHA256object *self)
{
uint8_t digest[SHA256_DIGESTSIZE];
assert(self->digestsize <= SHA256_DIGESTSIZE);
ENTER_HASHLIB(self);
// HACL* performs copies under the hood so that self->state remains valid
// after this call.
Hacl_Streaming_SHA2_finish_256(self->state, digest);
LEAVE_HASHLIB(self);
return PyBytes_FromStringAndSize((const char *)digest, self->digestsize);
}
@ -293,9 +315,11 @@ SHA512Type_digest_impl(SHA512object *self)
{
uint8_t digest[SHA512_DIGESTSIZE];
assert(self->digestsize <= SHA512_DIGESTSIZE);
ENTER_HASHLIB(self);
// HACL* performs copies under the hood so that self->state remains valid
// after this call.
Hacl_Streaming_SHA2_finish_512(self->state, digest);
LEAVE_HASHLIB(self);
return PyBytes_FromStringAndSize((const char *)digest, self->digestsize);
}
@ -311,7 +335,9 @@ SHA256Type_hexdigest_impl(SHA256object *self)
{
uint8_t digest[SHA256_DIGESTSIZE];
assert(self->digestsize <= SHA256_DIGESTSIZE);
ENTER_HASHLIB(self);
Hacl_Streaming_SHA2_finish_256(self->state, digest);
LEAVE_HASHLIB(self);
return _Py_strhex((const char *)digest, self->digestsize);
}
@ -327,7 +353,9 @@ SHA512Type_hexdigest_impl(SHA512object *self)
{
uint8_t digest[SHA512_DIGESTSIZE];
assert(self->digestsize <= SHA512_DIGESTSIZE);
ENTER_HASHLIB(self);
Hacl_Streaming_SHA2_finish_512(self->state, digest);
LEAVE_HASHLIB(self);
return _Py_strhex((const char *)digest, self->digestsize);
}
@ -348,7 +376,18 @@ SHA256Type_update(SHA256object *self, PyObject *obj)
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
update_256(self->state, buf.buf, buf.len);
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
}
if (self->lock != NULL) {
Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
update_256(self->state, buf.buf, buf.len);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
} else {
update_256(self->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
Py_RETURN_NONE;
@ -371,7 +410,18 @@ SHA512Type_update(SHA512object *self, PyObject *obj)
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
update_512(self->state, buf.buf, buf.len);
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
}
if (self->lock != NULL) {
Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
update_512(self->state, buf.buf, buf.len);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
} else {
update_512(self->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
Py_RETURN_NONE;
@ -560,7 +610,15 @@ _sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity)
return NULL;
}
if (string) {
update_256(new->state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update_256(new->state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
update_256(new->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}
@ -606,7 +664,15 @@ _sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity)
return NULL;
}
if (string) {
update_256(new->state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update_256(new->state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
update_256(new->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}
@ -651,7 +717,15 @@ _sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity)
return NULL;
}
if (string) {
update_512(new->state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update_512(new->state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
update_512(new->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}
@ -696,7 +770,15 @@ _sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity)
return NULL;
}
if (string) {
update_512(new->state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update_512(new->state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
update_512(new->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}

View file

@ -60,6 +60,9 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type"
typedef struct {
PyObject_HEAD
// Prevents undefined behavior via multiple threads entering the C API.
// The lock will be NULL before threaded access has been enabled.
PyThread_type_lock lock;
Hacl_Streaming_Keccak_state *hash_state;
} SHA3object;
@ -73,6 +76,7 @@ newSHA3object(PyTypeObject *type)
if (newobj == NULL) {
return NULL;
}
newobj->lock = NULL;
return newobj;
}
@ -133,7 +137,15 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity)
if (data) {
GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error);
sha3_update(self->hash_state, buf.buf, buf.len);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
sha3_update(self->hash_state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
} else {
sha3_update(self->hash_state, buf.buf, buf.len);
}
}
PyBuffer_Release(&buf);
@ -157,6 +169,9 @@ static void
SHA3_dealloc(SHA3object *self)
{
Hacl_Streaming_Keccak_free(self->hash_state);
if (self->lock != NULL) {
PyThread_free_lock(self->lock);
}
PyTypeObject *tp = Py_TYPE(self);
PyObject_Free(self);
Py_DECREF(tp);
@ -181,7 +196,9 @@ _sha3_sha3_224_copy_impl(SHA3object *self)
if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) {
return NULL;
}
ENTER_HASHLIB(self);
newobj->hash_state = Hacl_Streaming_Keccak_copy(self->hash_state);
LEAVE_HASHLIB(self);
return (PyObject *)newobj;
}
@ -199,7 +216,9 @@ _sha3_sha3_224_digest_impl(SHA3object *self)
unsigned char digest[SHA3_MAX_DIGESTSIZE];
// This function errors out if the algorithm is Shake. Here, we know this
// not to be the case, and therefore do not perform error checking.
ENTER_HASHLIB(self);
Hacl_Streaming_Keccak_finish(self->hash_state, digest);
LEAVE_HASHLIB(self);
return PyBytes_FromStringAndSize((const char *)digest,
Hacl_Streaming_Keccak_hash_len(self->hash_state));
}
@ -216,7 +235,9 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self)
/*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/
{
unsigned char digest[SHA3_MAX_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Streaming_Keccak_finish(self->hash_state, digest);
LEAVE_HASHLIB(self);
return _Py_strhex((const char *)digest,
Hacl_Streaming_Keccak_hash_len(self->hash_state));
}
@ -237,7 +258,18 @@ _sha3_sha3_224_update(SHA3object *self, PyObject *data)
{
Py_buffer buf;
GET_BUFFER_VIEW_OR_ERROUT(data, &buf);
sha3_update(self->hash_state, buf.buf, buf.len);
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
}
if (self->lock != NULL) {
Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
sha3_update(self->hash_state, buf.buf, buf.len);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
} else {
sha3_update(self->hash_state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
Py_RETURN_NONE;
}