gh-114271: Fix race in Thread.join() (#114839)

There is a race between when `Thread._tstate_lock` is released[^1] in `Thread._wait_for_tstate_lock()` and when `Thread._stop()` asserts[^2] that it is unlocked. Consider the following execution involving threads A, B, and C: 1. A starts. 2. B joins A, blocking on its `_tstate_lock`. 3. C joins A, blocking on its `_tstate_lock`. 4. A finishes and releases its `_tstate_lock`. 5. B acquires A's `_tstate_lock` in `_wait_for_tstate_lock()`, releases it, but is swapped out before calling `_stop()`. 6. C is scheduled, acquires A's `_tstate_lock` in `_wait_for_tstate_lock()` but is swapped out before releasing it. 7. B is scheduled, calls `_stop()`, which asserts that A's `_tstate_lock` is not held. However, C holds it, so the assertion fails. The race can be reproduced[^3] by inserting sleeps at the appropriate points in the threading code. To do so, run the `repro_join_race.py` from the linked repo. There are two main parts to this PR: 1. `_tstate_lock` is replaced with an event that is attached to `PyThreadState`. The event is set by the runtime prior to the thread being cleared (in the same place that `_tstate_lock` was released). `Thread.join()` blocks waiting for the event to be set. 2. `_PyInterpreterState_WaitForThreads()` provides the ability to wait for all non-daemon threads to exit. To do so, an `is_daemon` predicate was added to `PyThreadState`. This field is set each time a thread is created. `threading._shutdown()` now calls into `_PyInterpreterState_WaitForThreads()` instead of waiting on `_tstate_lock`s. [^1]: 441affc9e7/Lib/threading.py (L1201) [^2]: 441affc9e7/Lib/threading.py (L1115) [^3]: 8194653279 --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Antoine Pitrou <antoine@python.org>
2024-10-14 16:59:44 +00:00 · 2024-03-16 05:56:30 -07:00 · 2024-03-16 05:56:30 -07:00 · 33da0e844c
parent 86bc40dd41
commit 33da0e844c
12 changed files with 771 additions and 643 deletions
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@ -161,32 +161,6 @@ struct _ts {
     */
    uintptr_t critical_section;

-    /* Called when a thread state is deleted normally, but not when it
-     * is destroyed after fork().
-     * Pain:  to prevent rare but fatal shutdown errors (issue 18808),
-     * Thread.join() must wait for the join'ed thread's tstate to be unlinked
-     * from the tstate chain.  That happens at the end of a thread's life,
-     * in pystate.c.
-     * The obvious way doesn't quite work:  create a lock which the tstate
-     * unlinking code releases, and have Thread.join() wait to acquire that
-     * lock.  The problem is that we _are_ at the end of the thread's life:
-     * if the thread holds the last reference to the lock, decref'ing the
-     * lock will delete the lock, and that may trigger arbitrary Python code
-     * if there's a weakref, with a callback, to the lock.  But by this time
-     * _PyRuntime.gilstate.tstate_current is already NULL, so only the simplest
-     * of C code can be allowed to run (in particular it must not be possible to
-     * release the GIL).
-     * So instead of holding the lock directly, the tstate holds a weakref to
-     * the lock:  that's the value of on_delete_data below.  Decref'ing a
-     * weakref is harmless.
-     * on_delete points to _threadmodule.c's static release_sentinel() function.
-     * After the tstate is unlinked, release_sentinel is called with the
-     * weakref-to-lock (on_delete_data) argument, and release_sentinel releases
-     * the indirectly held lock.
-     */
-    void (*on_delete)(void *);
-    void *on_delete_data;
-
    int coroutine_origin_tracking_depth;

    PyObject *async_gen_firstiter;
--- a/Include/internal/pycore_lock.h
+++ b/Include/internal/pycore_lock.h
@ -153,16 +153,6 @@ PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
 // and 0 if the timeout expired or thread was interrupted.
 PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns);

-// A one-time event notification with reference counting.
-typedef struct _PyEventRc {
-    PyEvent event;
-    Py_ssize_t refcount;
-} _PyEventRc;
-
-_PyEventRc *_PyEventRc_New(void);
-void _PyEventRc_Incref(_PyEventRc *erc);
-void _PyEventRc_Decref(_PyEventRc *erc);
-
 // _PyRawMutex implements a word-sized mutex that that does not depend on the
 // parking lot API, and therefore can be used in the parking lot
 // implementation.
--- a/Include/internal/pycore_pythread.h
+++ b/Include/internal/pycore_pythread.h
@ -78,7 +78,7 @@ struct _pythread_runtime_state {
    } stubs;
 #endif

-    // Linked list of ThreadHandleObjects
+    // Linked list of ThreadHandles
    struct llist_node handles;
 };

--- a/Lib/test/test_audit.py
+++ b/Lib/test/test_audit.py
@ -209,7 +209,7 @@ def test_threading(self):
        expected = [
            ("_thread.start_new_thread", "(<test_func>, (), None)"),
            ("test.test_func", "()"),
-            ("_thread.start_joinable_thread", "(<test_func>,)"),
+            ("_thread.start_joinable_thread", "(<test_func>, 1, None)"),
            ("test.test_func", "()"),
        ]

--- a/Lib/test/test_concurrent_futures/test_process_pool.py
+++ b/Lib/test/test_concurrent_futures/test_process_pool.py
@ -201,13 +201,13 @@ def test_python_finalization_error(self):
        # QueueFeederThread.
        orig_start_new_thread = threading._start_joinable_thread
        nthread = 0
-        def mock_start_new_thread(func, *args):
+        def mock_start_new_thread(func, *args, **kwargs):
            nonlocal nthread
            if nthread >= 1:
                raise RuntimeError("can't create new thread at "
                                   "interpreter shutdown")
            nthread += 1
-            return orig_start_new_thread(func, *args)
+            return orig_start_new_thread(func, *args, **kwargs)

        with support.swap_attr(threading, '_start_joinable_thread',
                               mock_start_new_thread):
--- a/Lib/test/test_thread.py
+++ b/Lib/test/test_thread.py
@ -289,6 +289,54 @@ def joiner():
            with self.assertRaisesRegex(RuntimeError, "Cannot join current thread"):
                raise error

+    def test_join_with_timeout(self):
+        lock = thread.allocate_lock()
+        lock.acquire()
+
+        def thr():
+            lock.acquire()
+
+        with threading_helper.wait_threads_exit():
+            handle = thread.start_joinable_thread(thr)
+            handle.join(0.1)
+            self.assertFalse(handle.is_done())
+            lock.release()
+            handle.join()
+            self.assertTrue(handle.is_done())
+
+    def test_join_unstarted(self):
+        handle = thread._ThreadHandle()
+        with self.assertRaisesRegex(RuntimeError, "thread not started"):
+            handle.join()
+
+    def test_set_done_unstarted(self):
+        handle = thread._ThreadHandle()
+        with self.assertRaisesRegex(RuntimeError, "thread not started"):
+            handle._set_done()
+
+    def test_start_duplicate_handle(self):
+        lock = thread.allocate_lock()
+        lock.acquire()
+
+        def func():
+            lock.acquire()
+
+        handle = thread._ThreadHandle()
+        with threading_helper.wait_threads_exit():
+            thread.start_joinable_thread(func, handle=handle)
+            with self.assertRaisesRegex(RuntimeError, "thread already started"):
+                thread.start_joinable_thread(func, handle=handle)
+            lock.release()
+            handle.join()
+
+    def test_start_with_none_handle(self):
+        def func():
+            pass
+
+        with threading_helper.wait_threads_exit():
+            handle = thread.start_joinable_thread(func, handle=None)
+            handle.join()
+

 class Barrier:
    def __init__(self, num_threads):
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@ -408,7 +408,7 @@ def run(self):

    def test_limbo_cleanup(self):
        # Issue 7481: Failure to start thread should cleanup the limbo map.
-        def fail_new_thread(*args):
+        def fail_new_thread(*args, **kwargs):
            raise threading.ThreadError()
        _start_joinable_thread = threading._start_joinable_thread
        threading._start_joinable_thread = fail_new_thread
@ -912,41 +912,6 @@ def f():
        rc, out, err = assert_python_ok("-c", code)
        self.assertEqual(err, b"")

-    def test_tstate_lock(self):
-        # Test an implementation detail of Thread objects.
-        started = _thread.allocate_lock()
-        finish = _thread.allocate_lock()
-        started.acquire()
-        finish.acquire()
-        def f():
-            started.release()
-            finish.acquire()
-            time.sleep(0.01)
-        # The tstate lock is None until the thread is started
-        t = threading.Thread(target=f)
-        self.assertIs(t._tstate_lock, None)
-        t.start()
-        started.acquire()
-        self.assertTrue(t.is_alive())
-        # The tstate lock can't be acquired when the thread is running
-        # (or suspended).
-        tstate_lock = t._tstate_lock
-        self.assertFalse(tstate_lock.acquire(timeout=0), False)
-        finish.release()
-        # When the thread ends, the state_lock can be successfully
-        # acquired.
-        self.assertTrue(tstate_lock.acquire(timeout=support.SHORT_TIMEOUT), False)
-        # But is_alive() is still True:  we hold _tstate_lock now, which
-        # prevents is_alive() from knowing the thread's end-of-life C code
-        # is done.
-        self.assertTrue(t.is_alive())
-        # Let is_alive() find out the C code is done.
-        tstate_lock.release()
-        self.assertFalse(t.is_alive())
-        # And verify the thread disposed of _tstate_lock.
-        self.assertIsNone(t._tstate_lock)
-        t.join()
-
    def test_repr_stopped(self):
        # Verify that "stopped" shows up in repr(Thread) appropriately.
        started = _thread.allocate_lock()
@ -1112,30 +1077,6 @@ def checker():
        self.assertEqual(threading.getprofile(), old_profile)
        self.assertEqual(sys.getprofile(), old_profile)

-    @cpython_only
-    def test_shutdown_locks(self):
-        for daemon in (False, True):
-            with self.subTest(daemon=daemon):
-                event = threading.Event()
-                thread = threading.Thread(target=event.wait, daemon=daemon)
-
-                # Thread.start() must add lock to _shutdown_locks,
-                # but only for non-daemon thread
-                thread.start()
-                tstate_lock = thread._tstate_lock
-                if not daemon:
-                    self.assertIn(tstate_lock, threading._shutdown_locks)
-                else:
-                    self.assertNotIn(tstate_lock, threading._shutdown_locks)
-
-                # unblock the thread and join it
-                event.set()
-                thread.join()
-
-                # Thread._stop() must remove tstate_lock from _shutdown_locks.
-                # Daemon threads must never add it to _shutdown_locks.
-                self.assertNotIn(tstate_lock, threading._shutdown_locks)
-
    def test_locals_at_exit(self):
        # bpo-19466: thread locals must not be deleted before destructors
        # are called
--- a/Lib/threading.py
+++ b/Lib/threading.py
@ -36,8 +36,11 @@
 _daemon_threads_allowed = _thread.daemon_threads_allowed
 _allocate_lock = _thread.allocate_lock
 _LockType = _thread.LockType
-_set_sentinel = _thread._set_sentinel
+_thread_shutdown = _thread._shutdown
+_make_thread_handle = _thread._make_thread_handle
+_ThreadHandle = _thread._ThreadHandle
 get_ident = _thread.get_ident
+_get_main_thread_ident = _thread._get_main_thread_ident
 _is_main_interpreter = _thread._is_main_interpreter
 try:
    get_native_id = _thread.get_native_id
@ -847,25 +850,6 @@ def _newname(name_template):
 _limbo = {}
 _dangling = WeakSet()

-# Set of Thread._tstate_lock locks of non-daemon threads used by _shutdown()
-# to wait until all Python thread states get deleted:
-# see Thread._set_tstate_lock().
-_shutdown_locks_lock = _allocate_lock()
-_shutdown_locks = set()
-
-def _maintain_shutdown_locks():
-    """
-    Drop any shutdown locks that don't correspond to running threads anymore.
-
-    Calling this from time to time avoids an ever-growing _shutdown_locks
-    set when Thread objects are not joined explicitly. See bpo-37788.
-
-    This must be called with _shutdown_locks_lock acquired.
-    """
-    # If a lock was released, the corresponding thread has exited
-    to_remove = [lock for lock in _shutdown_locks if not lock.locked()]
-    _shutdown_locks.difference_update(to_remove)
-

 # Main class for threads

@ -930,10 +914,8 @@ class is implemented.
        self._ident = None
        if _HAVE_THREAD_NATIVE_ID:
            self._native_id = None
-        self._tstate_lock = None
-        self._handle = None
+        self._handle = _ThreadHandle()
        self._started = Event()
-        self._is_stopped = False
        self._initialized = True
        # Copy of sys.stderr used by self._invoke_excepthook()
        self._stderr = _sys.stderr
@ -947,28 +929,18 @@ def _after_fork(self, new_ident=None):
        if new_ident is not None:
            # This thread is alive.
            self._ident = new_ident
-            if self._handle is not None:
-                assert self._handle.ident == new_ident
-            # bpo-42350: If the fork happens when the thread is already stopped
-            # (ex: after threading._shutdown() has been called), _tstate_lock
-            # is None. Do nothing in this case.
-            if self._tstate_lock is not None:
-                self._tstate_lock._at_fork_reinit()
-                self._tstate_lock.acquire()
+            assert self._handle.ident == new_ident
        else:
-            # This thread isn't alive after fork: it doesn't have a tstate
-            # anymore.
-            self._is_stopped = True
-            self._tstate_lock = None
-            self._handle = None
+            # Otherwise, the thread is dead, Jim.  _PyThread_AfterFork()
+            # already marked our handle done.
+            pass

    def __repr__(self):
        assert self._initialized, "Thread.__init__() was not called"
        status = "initial"
        if self._started.is_set():
            status = "started"
-        self.is_alive() # easy way to get ._is_stopped set when appropriate
-        if self._is_stopped:
+        if self._handle.is_done():
            status = "stopped"
        if self._daemonic:
            status += " daemon"
@ -996,7 +968,8 @@ def start(self):
            _limbo[self] = self
        try:
            # Start joinable thread
-            self._handle = _start_joinable_thread(self._bootstrap)
+            _start_joinable_thread(self._bootstrap, handle=self._handle,
+                                   daemon=self.daemon)
        except Exception:
            with _active_limbo_lock:
                del _limbo[self]
@ -1047,23 +1020,9 @@ def _set_ident(self):
        def _set_native_id(self):
            self._native_id = get_native_id()

-    def _set_tstate_lock(self):
-        """
-        Set a lock object which will be released by the interpreter when
-        the underlying thread state (see pystate.h) gets deleted.
-        """
-        self._tstate_lock = _set_sentinel()
-        self._tstate_lock.acquire()
-
-        if not self.daemon:
-            with _shutdown_locks_lock:
-                _maintain_shutdown_locks()
-                _shutdown_locks.add(self._tstate_lock)
-
    def _bootstrap_inner(self):
        try:
            self._set_ident()
-            self._set_tstate_lock()
            if _HAVE_THREAD_NATIVE_ID:
                self._set_native_id()
            self._started.set()
@ -1083,33 +1042,6 @@ def _bootstrap_inner(self):
        finally:
            self._delete()

-    def _stop(self):
-        # After calling ._stop(), .is_alive() returns False and .join() returns
-        # immediately.  ._tstate_lock must be released before calling ._stop().
-        #
-        # Normal case:  C code at the end of the thread's life
-        # (release_sentinel in _threadmodule.c) releases ._tstate_lock, and
-        # that's detected by our ._wait_for_tstate_lock(), called by .join()
-        # and .is_alive().  Any number of threads _may_ call ._stop()
-        # simultaneously (for example, if multiple threads are blocked in
-        # .join() calls), and they're not serialized.  That's harmless -
-        # they'll just make redundant rebindings of ._is_stopped and
-        # ._tstate_lock.  Obscure:  we rebind ._tstate_lock last so that the
-        # "assert self._is_stopped" in ._wait_for_tstate_lock() always works
-        # (the assert is executed only if ._tstate_lock is None).
-        #
-        # Special case:  _main_thread releases ._tstate_lock via this
-        # module's _shutdown() function.
-        lock = self._tstate_lock
-        if lock is not None:
-            assert not lock.locked()
-        self._is_stopped = True
-        self._tstate_lock = None
-        if not self.daemon:
-            with _shutdown_locks_lock:
-                # Remove our lock and other released locks from _shutdown_locks
-                _maintain_shutdown_locks()
-
    def _delete(self):
        "Remove current thread from the dict of currently running threads."
        with _active_limbo_lock:
@ -1150,47 +1082,12 @@ def join(self, timeout=None):
        if self is current_thread():
            raise RuntimeError("cannot join current thread")

-        if timeout is None:
-            self._wait_for_tstate_lock()
-        else:
-            # the behavior of a negative timeout isn't documented, but
-            # historically .join(timeout=x) for x<0 has acted as if timeout=0
-            self._wait_for_tstate_lock(timeout=max(timeout, 0))
+        # the behavior of a negative timeout isn't documented, but
+        # historically .join(timeout=x) for x<0 has acted as if timeout=0
+        if timeout is not None:
+            timeout = max(timeout, 0)

-        if self._is_stopped:
-            self._join_os_thread()
-
-    def _join_os_thread(self):
-        # self._handle may be cleared post-fork
-        if self._handle is not None:
-            self._handle.join()
-
-    def _wait_for_tstate_lock(self, block=True, timeout=-1):
-        # Issue #18808: wait for the thread state to be gone.
-        # At the end of the thread's life, after all knowledge of the thread
-        # is removed from C data structures, C code releases our _tstate_lock.
-        # This method passes its arguments to _tstate_lock.acquire().
-        # If the lock is acquired, the C code is done, and self._stop() is
-        # called.  That sets ._is_stopped to True, and ._tstate_lock to None.
-        lock = self._tstate_lock
-        if lock is None:
-            # already determined that the C code is done
-            assert self._is_stopped
-            return
-
-        try:
-            if lock.acquire(block, timeout):
-                lock.release()
-                self._stop()
-        except:
-            if lock.locked():
-                # bpo-45274: lock.acquire() acquired the lock, but the function
-                # was interrupted with an exception before reaching the
-                # lock.release(). It can happen if a signal handler raises an
-                # exception, like CTRL+C which raises KeyboardInterrupt.
-                lock.release()
-                self._stop()
-            raise
+        self._handle.join(timeout)

    @property
    def name(self):
@ -1241,13 +1138,7 @@ def is_alive(self):

        """
        assert self._initialized, "Thread.__init__() not called"
-        if self._is_stopped or not self._started.is_set():
-            return False
-        self._wait_for_tstate_lock(False)
-        if not self._is_stopped:
-            return True
-        self._join_os_thread()
-        return False
+        return self._started.is_set() and not self._handle.is_done()

    @property
    def daemon(self):
@ -1456,18 +1347,14 @@ class _MainThread(Thread):

    def __init__(self):
        Thread.__init__(self, name="MainThread", daemon=False)
-        self._set_tstate_lock()
        self._started.set()
-        self._set_ident()
+        self._ident = _get_main_thread_ident()
+        self._handle = _make_thread_handle(self._ident)
        if _HAVE_THREAD_NATIVE_ID:
            self._set_native_id()
        with _active_limbo_lock:
            _active[self._ident] = self

-    def _join_os_thread(self):
-        # No ThreadHandle for main thread
-        pass
-

 # Helper thread-local instance to detect when a _DummyThread
 # is collected. Not a part of the public API.
@ -1510,17 +1397,15 @@ def __init__(self):
                        daemon=_daemon_threads_allowed())
        self._started.set()
        self._set_ident()
+        self._handle = _make_thread_handle(self._ident)
        if _HAVE_THREAD_NATIVE_ID:
            self._set_native_id()
        with _active_limbo_lock:
            _active[self._ident] = self
        _DeleteDummyThreadOnDel(self)

-    def _stop(self):
-        pass
-
    def is_alive(self):
-        if not self._is_stopped and self._started.is_set():
+        if not self._handle.is_done() and self._started.is_set():
            return True
        raise RuntimeError("thread is not alive")

@ -1532,7 +1417,6 @@ def _after_fork(self, new_ident=None):
            self.__class__ = _MainThread
            self._name = 'MainThread'
            self._daemonic = False
-            self._set_tstate_lock()
        Thread._after_fork(self, new_ident=new_ident)


@ -1631,12 +1515,11 @@ def _shutdown():
    """
    Wait until the Python thread state of all non-daemon threads get deleted.
    """
-    # Obscure:  other threads may be waiting to join _main_thread.  That's
-    # dubious, but some code does it.  We can't wait for C code to release
-    # the main thread's tstate_lock - that won't happen until the interpreter
-    # is nearly dead.  So we release it here.  Note that just calling _stop()
-    # isn't enough:  other threads may already be waiting on _tstate_lock.
-    if _main_thread._is_stopped and _is_main_interpreter():
+    # Obscure: other threads may be waiting to join _main_thread.  That's
+    # dubious, but some code does it. We can't wait for it to be marked as done
+    # normally - that won't happen until the interpreter is nearly dead. So
+    # mark it done here.
+    if _main_thread._handle.is_done() and _is_main_interpreter():
        # _shutdown() was already called
        return

@ -1648,42 +1531,11 @@ def _shutdown():
    for atexit_call in reversed(_threading_atexits):
        atexit_call()

-    # Main thread
-    if _main_thread.ident == get_ident():
-        tlock = _main_thread._tstate_lock
-        # The main thread isn't finished yet, so its thread state lock can't
-        # have been released.
-        assert tlock is not None
-        if tlock.locked():
-            # It should have been released already by
-            # _PyInterpreterState_SetNotRunningMain(), but there may be
-            # embedders that aren't calling that yet.
-            tlock.release()
-        _main_thread._stop()
-    else:
-        # bpo-1596321: _shutdown() must be called in the main thread.
-        # If the threading module was not imported by the main thread,
-        # _main_thread is the thread which imported the threading module.
-        # In this case, ignore _main_thread, similar behavior than for threads
-        # spawned by C libraries or using _thread.start_new_thread().
-        pass
+    if _is_main_interpreter():
+        _main_thread._handle._set_done()

-    # Join all non-deamon threads
-    while True:
-        with _shutdown_locks_lock:
-            locks = list(_shutdown_locks)
-            _shutdown_locks.clear()
-
-        if not locks:
-            break
-
-        for lock in locks:
-            # mimic Thread.join()
-            lock.acquire()
-            lock.release()
-
-        # new threads can be spawned while we were waiting for the other
-        # threads to complete
+    # Wait for all non-daemon threads to exit.
+    _thread_shutdown()


 def main_thread():
@ -1703,7 +1555,6 @@ def _after_fork():
    # Reset _active_limbo_lock, in case we forked while the lock was held
    # by another (non-forked) thread.  http://bugs.python.org/issue874900
    global _active_limbo_lock, _main_thread
-    global _shutdown_locks_lock, _shutdown_locks
    _active_limbo_lock = RLock()

    # fork() only copied the current thread; clear references to others.
@ -1719,10 +1570,6 @@ def _after_fork():

    _main_thread = current

-    # reset _shutdown() locks: threads re-register their _tstate_lock below
-    _shutdown_locks_lock = _allocate_lock()
-    _shutdown_locks = set()
-
    with _active_limbo_lock:
        # Dangling thread instances must still have their locks reset,
        # because someone may join() them.
@ -1739,7 +1586,6 @@ def _after_fork():
            else:
                # All the others are already stopped.
                thread._after_fork()
-                thread._stop()

        _limbo.clear()
        _active.clear()
--- a/Misc/NEWS.d/next/Library/2024-02-01-03-09-38.gh-issue-114271.raCkt5.rst
+++ b/Misc/NEWS.d/next/Library/2024-02-01-03-09-38.gh-issue-114271.raCkt5.rst
@ -0,0 +1,7 @@
+Fix a race in ``threading.Thread.join()``.
+
+``threading._MainThread`` now always represents the main thread of the main
+interpreter.
+
+``PyThreadState.on_delete`` and ``PyThreadState.on_delete_data`` have been
+removed.
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
--- a/Python/lock.c
+++ b/Python/lock.c
@ -304,30 +304,6 @@ PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
    }
 }

-_PyEventRc *
-_PyEventRc_New(void)
-{
-    _PyEventRc *erc = (_PyEventRc *)PyMem_RawCalloc(1, sizeof(_PyEventRc));
-    if (erc != NULL) {
-        erc->refcount = 1;
-    }
-    return erc;
-}
-
-void
-_PyEventRc_Incref(_PyEventRc *erc)
-{
-    _Py_atomic_add_ssize(&erc->refcount, 1);
-}
-
-void
-_PyEventRc_Decref(_PyEventRc *erc)
-{
-    if (_Py_atomic_add_ssize(&erc->refcount, -1) == 1) {
-        PyMem_RawFree(erc);
-    }
-}
-
 static int
 unlock_once(_PyOnceFlag *o, int res)
 {
--- a/Python/pystate.c
+++ b/Python/pystate.c
@ -1032,20 +1032,7 @@ _PyInterpreterState_SetRunningMain(PyInterpreterState *interp)
 void
 _PyInterpreterState_SetNotRunningMain(PyInterpreterState *interp)
 {
-    PyThreadState *tstate = interp->threads.main;
-    assert(tstate == current_fast_get());
-
-    if (tstate->on_delete != NULL) {
-        // The threading module was imported for the first time in this
-        // thread, so it was set as threading._main_thread.  (See gh-75698.)
-        // The thread has finished running the Python program so we mark
-        // the thread object as finished.
-        assert(tstate->_whence != _PyThreadState_WHENCE_THREADING);
-        tstate->on_delete(tstate->on_delete_data);
-        tstate->on_delete = NULL;
-        tstate->on_delete_data = NULL;
-    }
-
+    assert(interp->threads.main == current_fast_get());
    interp->threads.main = NULL;
 }

@ -1570,16 +1557,6 @@ PyThreadState_Clear(PyThreadState *tstate)

    Py_CLEAR(tstate->context);

-    if (tstate->on_delete != NULL) {
-        // For the "main" thread of each interpreter, this is meant
-        // to be done in _PyInterpreterState_SetNotRunningMain().
-        // That leaves threads created by the threading module,
-        // and any threads killed by forking.
-        // However, we also accommodate "main" threads that still
-        // don't call _PyInterpreterState_SetNotRunningMain() yet.
-        tstate->on_delete(tstate->on_delete_data);
-    }
-
 #ifdef Py_GIL_DISABLED
    // Each thread should clear own freelists in free-threading builds.
    struct _Py_object_freelists *freelists = _Py_object_freelists_GET();