[3.12] gh-104341: Call _PyEval_ReleaseLock() with NULL When Finalizing the Current Thread (gh-105109) (gh-105209)

This avoids the problematic race in drop_gil() by skipping the FORCE_SWITCHING code there for finalizing threads. (The idea for this approach came out of discussions with @markshannon.) (cherry picked from commit 3698fda) Co-authored-by: Eric Snow ericsnowcurrently@gmail.com
2024-10-05 21:54:58 +00:00 · 2023-06-01 15:50:28 -07:00 · 2023-06-01 15:50:28 -07:00 · d2be5c73ed
parent c38ceb032d
commit d2be5c73ed
4 changed files with 46 additions and 9 deletions
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@ -100,7 +100,7 @@ extern PyStatus _PyEval_InitGIL(PyThreadState *tstate, int own_gil);
 extern void _PyEval_FiniGIL(PyInterpreterState *interp);

 extern void _PyEval_AcquireLock(PyThreadState *tstate);
-extern void _PyEval_ReleaseLock(PyThreadState *tstate);
+extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *);
 extern PyThreadState * _PyThreadState_SwapNoGIL(PyThreadState *);

 extern void _PyEval_DeactivateOpCache(void);
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@ -278,6 +278,15 @@ static void recreate_gil(struct _gil_runtime_state *gil)
 static void
 drop_gil(struct _ceval_state *ceval, PyThreadState *tstate)
 {
+    /* If tstate is NULL, the caller is indicating that we're releasing
+       the GIL for the last time in this thread.  This is particularly
+       relevant when the current thread state is finalizing or its
+       interpreter is finalizing (either may be in an inconsistent
+       state).  In that case the current thread will definitely
+       never try to acquire the GIL again. */
+    // XXX It may be more correct to check tstate->_status.finalizing.
+    // XXX assert(tstate == NULL || !tstate->_status.cleared);
+
    struct _gil_runtime_state *gil = ceval->gil;
    if (!_Py_atomic_load_relaxed(&gil->locked)) {
        Py_FatalError("drop_gil: GIL is not locked");
@ -298,7 +307,15 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate)
    MUTEX_UNLOCK(gil->mutex);

 #ifdef FORCE_SWITCHING
-    if (_Py_atomic_load_relaxed(&ceval->gil_drop_request) && tstate != NULL) {
+    /* We check tstate first in case we might be releasing the GIL for
+       the last time in this thread.  In that case there's a possible
+       race with tstate->interp getting deleted after gil->mutex is
+       unlocked and before the following code runs, leading to a crash.
+       We can use (tstate == NULL) to indicate the thread is done with
+       the GIL, and that's the only time we might delete the
+       interpreter, so checking tstate first prevents the crash.
+       See https://github.com/python/cpython/issues/104341. */
+    if (tstate != NULL && _Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
        MUTEX_LOCK(gil->switch_mutex);
        /* Not switched yet => wait */
        if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate)
@ -350,6 +367,9 @@ take_gil(PyThreadState *tstate)
    int err = errno;

    assert(tstate != NULL);
+    /* We shouldn't be using a thread state that isn't viable any more. */
+    // XXX It may be more correct to check tstate->_status.finalizing.
+    // XXX assert(!tstate->_status.cleared);

    if (tstate_must_exit(tstate)) {
        /* bpo-39877: If Py_Finalize() has been called and tstate is not the
@ -625,10 +645,12 @@ _PyEval_AcquireLock(PyThreadState *tstate)
 }

 void
-_PyEval_ReleaseLock(PyThreadState *tstate)
+_PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate)
 {
-    _Py_EnsureTstateNotNULL(tstate);
-    struct _ceval_state *ceval = &tstate->interp->ceval;
+    /* If tstate is NULL then we do not expect the current thread
+       to acquire the GIL ever again. */
+    assert(tstate == NULL || tstate->interp == interp);
+    struct _ceval_state *ceval = &interp->ceval;
    drop_gil(ceval, tstate);
 }

--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@ -2035,7 +2035,7 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
    const PyConfig *src_config;
    if (save_tstate != NULL) {
        // XXX Might new_interpreter() have been called without the GIL held?
-        _PyEval_ReleaseLock(save_tstate);
+        _PyEval_ReleaseLock(save_tstate->interp, save_tstate);
        src_config = _PyInterpreterState_GetConfig(save_tstate->interp);
    }
    else
--- a/Python/pystate.c
+++ b/Python/pystate.c
@ -822,6 +822,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
        p = p->next;
        HEAD_UNLOCK(runtime);
    }
+    if (tstate->interp == interp) {
+        /* We fix tstate->_status below when we for sure aren't using it
+           (e.g. no longer need the GIL). */
+        // XXX Eliminate the need to do this.
+        tstate->_status.cleared = 0;
+    }

    /* It is possible that any of the objects below have a finalizer
       that runs Python code or otherwise relies on a thread state
@ -886,6 +892,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
    Py_CLEAR(interp->builtins);
    Py_CLEAR(interp->interpreter_trampoline);

+    if (tstate->interp == interp) {
+        /* We are now safe to fix tstate->_status.cleared. */
+        // XXX Do this (much) earlier?
+        tstate->_status.cleared = 1;
+    }
+
    for (int i=0; i < DICT_MAX_WATCHERS; i++) {
        interp->dict_state.watchers[i] = NULL;
    }
@ -930,6 +942,7 @@ _PyInterpreterState_Clear(PyThreadState *tstate)
 }


+static inline void tstate_deactivate(PyThreadState *tstate);
 static void zapthreads(PyInterpreterState *interp);

 void
@ -943,7 +956,9 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
    PyThreadState *tcur = current_fast_get(runtime);
    if (tcur != NULL && interp == tcur->interp) {
        /* Unset current thread.  After this, many C API calls become crashy. */
-        _PyThreadState_Swap(runtime, NULL);
+        current_fast_clear(runtime);
+        tstate_deactivate(tcur);
+        _PyEval_ReleaseLock(interp, NULL);
    }

    zapthreads(interp);
@ -1567,7 +1582,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate)
    _Py_EnsureTstateNotNULL(tstate);
    tstate_delete_common(tstate);
    current_fast_clear(tstate->interp->runtime);
-    _PyEval_ReleaseLock(tstate);
+    _PyEval_ReleaseLock(tstate->interp, NULL);
    free_threadstate(tstate);
 }

@ -1907,7 +1922,7 @@ _PyThreadState_Swap(_PyRuntimeState *runtime, PyThreadState *newts)
 {
    PyThreadState *oldts = current_fast_get(runtime);
    if (oldts != NULL) {
-        _PyEval_ReleaseLock(oldts);
+        _PyEval_ReleaseLock(oldts->interp, oldts);
    }
    _swap_thread_states(runtime, oldts, newts);
    if (newts != NULL) {