From b6228b521b4692b2de1c1c12f4aa5623f8319084 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 6 Feb 2024 14:45:04 -0500 Subject: [PATCH] gh-115035: Mark ThreadHandles as non-joinable earlier after forking (#115042) This marks dead ThreadHandles as non-joinable earlier in `PyOS_AfterFork_Child()` before we execute any Python code. The handles are stored in a global linked list in `_PyRuntimeState` because `fork()` affects the entire process. --- Include/internal/pycore_pythread.h | 15 +++++--- Include/internal/pycore_runtime_init.h | 2 + Lib/threading.py | 5 +-- Modules/_threadmodule.c | 53 +++++++++++++++++--------- Python/pystate.c | 2 + Python/thread_nt.h | 4 -- Python/thread_pthread.h | 10 ----- 7 files changed, 50 insertions(+), 41 deletions(-) diff --git a/Include/internal/pycore_pythread.h b/Include/internal/pycore_pythread.h index 9c9a09f60f3..265299d7574 100644 --- a/Include/internal/pycore_pythread.h +++ b/Include/internal/pycore_pythread.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "dynamic_annotations.h" // _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX +#include "pycore_llist.h" // struct llist_node // Get _POSIX_THREADS and _POSIX_SEMAPHORES macros if available #if (defined(HAVE_UNISTD_H) && !defined(_POSIX_THREADS) \ @@ -75,14 +76,22 @@ struct _pythread_runtime_state { struct py_stub_tls_entry tls_entries[PTHREAD_KEYS_MAX]; } stubs; #endif + + // Linked list of ThreadHandleObjects + struct llist_node handles; }; +#define _pythread_RUNTIME_INIT(pythread) \ + { \ + .handles = LLIST_INIT(pythread.handles), \ + } #ifdef HAVE_FORK /* Private function to reinitialize a lock at fork in the child process. Reset the lock to the unlocked state. Return 0 on success, return -1 on error. */ extern int _PyThread_at_fork_reinit(PyThread_type_lock *lock); +extern void _PyThread_AfterFork(struct _pythread_runtime_state *state); #endif /* HAVE_FORK */ @@ -143,12 +152,6 @@ PyAPI_FUNC(int) PyThread_join_thread(PyThread_handle_t); */ PyAPI_FUNC(int) PyThread_detach_thread(PyThread_handle_t); -/* - * Obtain the new thread ident and handle in a forked child process. - */ -PyAPI_FUNC(void) PyThread_update_thread_after_fork(PyThread_ident_t* ident, - PyThread_handle_t* handle); - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 4370ad05bdc..2ad1347ad48 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -16,6 +16,7 @@ extern "C" { #include "pycore_parser.h" // _parser_runtime_state_INIT #include "pycore_pyhash.h" // pyhash_state_INIT #include "pycore_pymem_init.h" // _pymem_allocators_standard_INIT +#include "pycore_pythread.h" // _pythread_RUNTIME_INIT #include "pycore_runtime_init_generated.h" // _Py_bytes_characters_INIT #include "pycore_signal.h" // _signals_RUNTIME_INIT #include "pycore_tracemalloc.h" // _tracemalloc_runtime_state_INIT @@ -90,6 +91,7 @@ extern PyTypeObject _PyExc_MemoryError; }, \ .obmalloc = _obmalloc_global_state_INIT, \ .pyhash_state = pyhash_state_INIT, \ + .threads = _pythread_RUNTIME_INIT(runtime.threads), \ .signals = _signals_RUNTIME_INIT, \ .interpreters = { \ /* This prevents interpreters from getting created \ diff --git a/Lib/threading.py b/Lib/threading.py index 75a08e5aac9..b6ff00acadd 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -949,7 +949,6 @@ def _after_fork(self, new_ident=None): # This thread is alive. self._ident = new_ident if self._handle is not None: - self._handle.after_fork_alive() assert self._handle.ident == new_ident # bpo-42350: If the fork happens when the thread is already stopped # (ex: after threading._shutdown() has been called), _tstate_lock @@ -965,9 +964,7 @@ def _after_fork(self, new_ident=None): self._is_stopped = True self._tstate_lock = None self._join_lock = None - if self._handle is not None: - self._handle.after_fork_dead() - self._handle = None + self._handle = None def __repr__(self): assert self._initialized, "Thread.__init__() was not called" diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 5cceb84658d..df02b023012 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -44,6 +44,7 @@ get_thread_state(PyObject *module) typedef struct { PyObject_HEAD + struct llist_node node; // linked list node (see _pythread_runtime_state) PyThread_ident_t ident; PyThread_handle_t handle; char joinable; @@ -59,6 +60,11 @@ new_thread_handle(thread_module_state* state) self->ident = 0; self->handle = 0; self->joinable = 0; + + HEAD_LOCK(&_PyRuntime); + llist_insert_tail(&_PyRuntime.threads.handles, &self->node); + HEAD_UNLOCK(&_PyRuntime); + return self; } @@ -66,6 +72,14 @@ static void ThreadHandle_dealloc(ThreadHandleObject *self) { PyObject *tp = (PyObject *) Py_TYPE(self); + + // Remove ourself from the global list of handles + HEAD_LOCK(&_PyRuntime); + if (self->node.next != NULL) { + llist_remove(&self->node); + } + HEAD_UNLOCK(&_PyRuntime); + if (self->joinable) { int ret = PyThread_detach_thread(self->handle); if (ret) { @@ -77,6 +91,28 @@ ThreadHandle_dealloc(ThreadHandleObject *self) Py_DECREF(tp); } +void +_PyThread_AfterFork(struct _pythread_runtime_state *state) +{ + // gh-115035: We mark ThreadHandles as not joinable early in the child's + // after-fork handler. We do this before calling any Python code to ensure + // that it happens before any ThreadHandles are deallocated, such as by a + // GC cycle. + PyThread_ident_t current = PyThread_get_thread_ident_ex(); + + struct llist_node *node; + llist_for_each_safe(node, &state->handles) { + ThreadHandleObject *hobj = llist_data(node, ThreadHandleObject, node); + if (hobj->ident == current) { + continue; + } + + // Disallow calls to detach() and join() as they could crash. + hobj->joinable = 0; + llist_remove(node); + } +} + static PyObject * ThreadHandle_repr(ThreadHandleObject *self) { @@ -91,21 +127,6 @@ ThreadHandle_get_ident(ThreadHandleObject *self, void *ignored) } -static PyObject * -ThreadHandle_after_fork_alive(ThreadHandleObject *self, void* ignored) -{ - PyThread_update_thread_after_fork(&self->ident, &self->handle); - Py_RETURN_NONE; -} - -static PyObject * -ThreadHandle_after_fork_dead(ThreadHandleObject *self, void* ignored) -{ - // Disallow calls to detach() and join() as they could crash. - self->joinable = 0; - Py_RETURN_NONE; -} - static PyObject * ThreadHandle_detach(ThreadHandleObject *self, void* ignored) { @@ -157,8 +178,6 @@ static PyGetSetDef ThreadHandle_getsetlist[] = { static PyMethodDef ThreadHandle_methods[] = { - {"after_fork_alive", (PyCFunction)ThreadHandle_after_fork_alive, METH_NOARGS}, - {"after_fork_dead", (PyCFunction)ThreadHandle_after_fork_dead, METH_NOARGS}, {"detach", (PyCFunction)ThreadHandle_detach, METH_NOARGS}, {"join", (PyCFunction)ThreadHandle_join, METH_NOARGS}, {0, 0} diff --git a/Python/pystate.c b/Python/pystate.c index 7836c172bbf..e77e5bfa7e2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -517,6 +517,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) return _PyStatus_NO_MEMORY(); } + _PyThread_AfterFork(&runtime->threads); + return _PyStatus_OK(); } #endif diff --git a/Python/thread_nt.h b/Python/thread_nt.h index 044e9fa111e..ad467e0e784 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -242,10 +242,6 @@ PyThread_detach_thread(PyThread_handle_t handle) { return (CloseHandle(hThread) == 0); } -void -PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) { -} - /* * Return the thread Id instead of a handle. The Id is said to uniquely identify the * thread in the system diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index fb3b79fc160..556e3de0b07 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -339,16 +339,6 @@ PyThread_detach_thread(PyThread_handle_t th) { return pthread_detach((pthread_t) th); } -void -PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) { - // The thread id might have been updated in the forked child - pthread_t th = pthread_self(); - *ident = (PyThread_ident_t) th; - *handle = (PyThread_handle_t) th; - assert(th == (pthread_t) *ident); - assert(th == (pthread_t) *handle); -} - /* XXX This implementation is considered (to quote Tim Peters) "inherently hosed" because: - It does not guarantee the promise that a non-zero integer is returned.