gh-115035: Mark ThreadHandles as non-joinable earlier after forking (#115042)

This marks dead ThreadHandles as non-joinable earlier in
`PyOS_AfterFork_Child()` before we execute any Python code. The handles
are stored in a global linked list in `_PyRuntimeState` because `fork()`
affects the entire process.
This commit is contained in:
Sam Gross 2024-02-06 14:45:04 -05:00 committed by GitHub
parent 71239d50b5
commit b6228b521b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 50 additions and 41 deletions

View file

@ -9,6 +9,7 @@ extern "C" {
#endif
#include "dynamic_annotations.h" // _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX
#include "pycore_llist.h" // struct llist_node
// Get _POSIX_THREADS and _POSIX_SEMAPHORES macros if available
#if (defined(HAVE_UNISTD_H) && !defined(_POSIX_THREADS) \
@ -75,14 +76,22 @@ struct _pythread_runtime_state {
struct py_stub_tls_entry tls_entries[PTHREAD_KEYS_MAX];
} stubs;
#endif
// Linked list of ThreadHandleObjects
struct llist_node handles;
};
#define _pythread_RUNTIME_INIT(pythread) \
{ \
.handles = LLIST_INIT(pythread.handles), \
}
#ifdef HAVE_FORK
/* Private function to reinitialize a lock at fork in the child process.
Reset the lock to the unlocked state.
Return 0 on success, return -1 on error. */
extern int _PyThread_at_fork_reinit(PyThread_type_lock *lock);
extern void _PyThread_AfterFork(struct _pythread_runtime_state *state);
#endif /* HAVE_FORK */
@ -143,12 +152,6 @@ PyAPI_FUNC(int) PyThread_join_thread(PyThread_handle_t);
*/
PyAPI_FUNC(int) PyThread_detach_thread(PyThread_handle_t);
/*
* Obtain the new thread ident and handle in a forked child process.
*/
PyAPI_FUNC(void) PyThread_update_thread_after_fork(PyThread_ident_t* ident,
PyThread_handle_t* handle);
#ifdef __cplusplus
}
#endif

View file

@ -16,6 +16,7 @@ extern "C" {
#include "pycore_parser.h" // _parser_runtime_state_INIT
#include "pycore_pyhash.h" // pyhash_state_INIT
#include "pycore_pymem_init.h" // _pymem_allocators_standard_INIT
#include "pycore_pythread.h" // _pythread_RUNTIME_INIT
#include "pycore_runtime_init_generated.h" // _Py_bytes_characters_INIT
#include "pycore_signal.h" // _signals_RUNTIME_INIT
#include "pycore_tracemalloc.h" // _tracemalloc_runtime_state_INIT
@ -90,6 +91,7 @@ extern PyTypeObject _PyExc_MemoryError;
}, \
.obmalloc = _obmalloc_global_state_INIT, \
.pyhash_state = pyhash_state_INIT, \
.threads = _pythread_RUNTIME_INIT(runtime.threads), \
.signals = _signals_RUNTIME_INIT, \
.interpreters = { \
/* This prevents interpreters from getting created \

View file

@ -949,7 +949,6 @@ def _after_fork(self, new_ident=None):
# This thread is alive.
self._ident = new_ident
if self._handle is not None:
self._handle.after_fork_alive()
assert self._handle.ident == new_ident
# bpo-42350: If the fork happens when the thread is already stopped
# (ex: after threading._shutdown() has been called), _tstate_lock
@ -965,9 +964,7 @@ def _after_fork(self, new_ident=None):
self._is_stopped = True
self._tstate_lock = None
self._join_lock = None
if self._handle is not None:
self._handle.after_fork_dead()
self._handle = None
self._handle = None
def __repr__(self):
assert self._initialized, "Thread.__init__() was not called"

View file

@ -44,6 +44,7 @@ get_thread_state(PyObject *module)
typedef struct {
PyObject_HEAD
struct llist_node node; // linked list node (see _pythread_runtime_state)
PyThread_ident_t ident;
PyThread_handle_t handle;
char joinable;
@ -59,6 +60,11 @@ new_thread_handle(thread_module_state* state)
self->ident = 0;
self->handle = 0;
self->joinable = 0;
HEAD_LOCK(&_PyRuntime);
llist_insert_tail(&_PyRuntime.threads.handles, &self->node);
HEAD_UNLOCK(&_PyRuntime);
return self;
}
@ -66,6 +72,14 @@ static void
ThreadHandle_dealloc(ThreadHandleObject *self)
{
PyObject *tp = (PyObject *) Py_TYPE(self);
// Remove ourself from the global list of handles
HEAD_LOCK(&_PyRuntime);
if (self->node.next != NULL) {
llist_remove(&self->node);
}
HEAD_UNLOCK(&_PyRuntime);
if (self->joinable) {
int ret = PyThread_detach_thread(self->handle);
if (ret) {
@ -77,6 +91,28 @@ ThreadHandle_dealloc(ThreadHandleObject *self)
Py_DECREF(tp);
}
void
_PyThread_AfterFork(struct _pythread_runtime_state *state)
{
// gh-115035: We mark ThreadHandles as not joinable early in the child's
// after-fork handler. We do this before calling any Python code to ensure
// that it happens before any ThreadHandles are deallocated, such as by a
// GC cycle.
PyThread_ident_t current = PyThread_get_thread_ident_ex();
struct llist_node *node;
llist_for_each_safe(node, &state->handles) {
ThreadHandleObject *hobj = llist_data(node, ThreadHandleObject, node);
if (hobj->ident == current) {
continue;
}
// Disallow calls to detach() and join() as they could crash.
hobj->joinable = 0;
llist_remove(node);
}
}
static PyObject *
ThreadHandle_repr(ThreadHandleObject *self)
{
@ -91,21 +127,6 @@ ThreadHandle_get_ident(ThreadHandleObject *self, void *ignored)
}
static PyObject *
ThreadHandle_after_fork_alive(ThreadHandleObject *self, void* ignored)
{
PyThread_update_thread_after_fork(&self->ident, &self->handle);
Py_RETURN_NONE;
}
static PyObject *
ThreadHandle_after_fork_dead(ThreadHandleObject *self, void* ignored)
{
// Disallow calls to detach() and join() as they could crash.
self->joinable = 0;
Py_RETURN_NONE;
}
static PyObject *
ThreadHandle_detach(ThreadHandleObject *self, void* ignored)
{
@ -157,8 +178,6 @@ static PyGetSetDef ThreadHandle_getsetlist[] = {
static PyMethodDef ThreadHandle_methods[] =
{
{"after_fork_alive", (PyCFunction)ThreadHandle_after_fork_alive, METH_NOARGS},
{"after_fork_dead", (PyCFunction)ThreadHandle_after_fork_dead, METH_NOARGS},
{"detach", (PyCFunction)ThreadHandle_detach, METH_NOARGS},
{"join", (PyCFunction)ThreadHandle_join, METH_NOARGS},
{0, 0}

View file

@ -517,6 +517,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
return _PyStatus_NO_MEMORY();
}
_PyThread_AfterFork(&runtime->threads);
return _PyStatus_OK();
}
#endif

View file

@ -242,10 +242,6 @@ PyThread_detach_thread(PyThread_handle_t handle) {
return (CloseHandle(hThread) == 0);
}
void
PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) {
}
/*
* Return the thread Id instead of a handle. The Id is said to uniquely identify the
* thread in the system

View file

@ -339,16 +339,6 @@ PyThread_detach_thread(PyThread_handle_t th) {
return pthread_detach((pthread_t) th);
}
void
PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) {
// The thread id might have been updated in the forked child
pthread_t th = pthread_self();
*ident = (PyThread_ident_t) th;
*handle = (PyThread_handle_t) th;
assert(th == (pthread_t) *ident);
assert(th == (pthread_t) *handle);
}
/* XXX This implementation is considered (to quote Tim Peters) "inherently
hosed" because:
- It does not guarantee the promise that a non-zero integer is returned.