gh-114271: Fix race in Thread.join() (#114839)

There is a race between when `Thread._tstate_lock` is released[^1] in `Thread._wait_for_tstate_lock()`
and when `Thread._stop()` asserts[^2] that it is unlocked. Consider the following execution
involving threads A, B, and C:

1. A starts.
2. B joins A, blocking on its `_tstate_lock`.
3. C joins A, blocking on its `_tstate_lock`.
4. A finishes and releases its `_tstate_lock`.
5. B acquires A's `_tstate_lock` in `_wait_for_tstate_lock()`, releases it, but is swapped
   out before calling `_stop()`.
6. C is scheduled, acquires A's `_tstate_lock` in `_wait_for_tstate_lock()` but is swapped
   out before releasing it.
7. B is scheduled, calls `_stop()`, which asserts that A's `_tstate_lock` is not held.
   However, C holds it, so the assertion fails.

The race can be reproduced[^3] by inserting sleeps at the appropriate points in
the threading code. To do so, run the `repro_join_race.py` from the linked repo.

There are two main parts to this PR:

1. `_tstate_lock` is replaced with an event that is attached to `PyThreadState`.
   The event is set by the runtime prior to the thread being cleared (in the same
   place that `_tstate_lock` was released). `Thread.join()` blocks waiting for the
   event to be set.
2. `_PyInterpreterState_WaitForThreads()` provides the ability to wait for all
   non-daemon threads to exit. To do so, an `is_daemon` predicate was added to
   `PyThreadState`. This field is set each time a thread is created. `threading._shutdown()`
   now calls into `_PyInterpreterState_WaitForThreads()` instead of waiting on
   `_tstate_lock`s.

[^1]: 441affc9e7/Lib/threading.py (L1201)
[^2]: 441affc9e7/Lib/threading.py (L1115)
[^3]: 8194653279

---------

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
This commit is contained in:
mpage 2024-03-16 05:56:30 -07:00 committed by GitHub
parent 86bc40dd41
commit 33da0e844c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 771 additions and 643 deletions

View file

@ -161,32 +161,6 @@ struct _ts {
*/
uintptr_t critical_section;
/* Called when a thread state is deleted normally, but not when it
* is destroyed after fork().
* Pain: to prevent rare but fatal shutdown errors (issue 18808),
* Thread.join() must wait for the join'ed thread's tstate to be unlinked
* from the tstate chain. That happens at the end of a thread's life,
* in pystate.c.
* The obvious way doesn't quite work: create a lock which the tstate
* unlinking code releases, and have Thread.join() wait to acquire that
* lock. The problem is that we _are_ at the end of the thread's life:
* if the thread holds the last reference to the lock, decref'ing the
* lock will delete the lock, and that may trigger arbitrary Python code
* if there's a weakref, with a callback, to the lock. But by this time
* _PyRuntime.gilstate.tstate_current is already NULL, so only the simplest
* of C code can be allowed to run (in particular it must not be possible to
* release the GIL).
* So instead of holding the lock directly, the tstate holds a weakref to
* the lock: that's the value of on_delete_data below. Decref'ing a
* weakref is harmless.
* on_delete points to _threadmodule.c's static release_sentinel() function.
* After the tstate is unlinked, release_sentinel is called with the
* weakref-to-lock (on_delete_data) argument, and release_sentinel releases
* the indirectly held lock.
*/
void (*on_delete)(void *);
void *on_delete_data;
int coroutine_origin_tracking_depth;
PyObject *async_gen_firstiter;

View file

@ -153,16 +153,6 @@ PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
// and 0 if the timeout expired or thread was interrupted.
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns);
// A one-time event notification with reference counting.
typedef struct _PyEventRc {
PyEvent event;
Py_ssize_t refcount;
} _PyEventRc;
_PyEventRc *_PyEventRc_New(void);
void _PyEventRc_Incref(_PyEventRc *erc);
void _PyEventRc_Decref(_PyEventRc *erc);
// _PyRawMutex implements a word-sized mutex that that does not depend on the
// parking lot API, and therefore can be used in the parking lot
// implementation.

View file

@ -78,7 +78,7 @@ struct _pythread_runtime_state {
} stubs;
#endif
// Linked list of ThreadHandleObjects
// Linked list of ThreadHandles
struct llist_node handles;
};

View file

@ -209,7 +209,7 @@ def test_threading(self):
expected = [
("_thread.start_new_thread", "(<test_func>, (), None)"),
("test.test_func", "()"),
("_thread.start_joinable_thread", "(<test_func>,)"),
("_thread.start_joinable_thread", "(<test_func>, 1, None)"),
("test.test_func", "()"),
]

View file

@ -201,13 +201,13 @@ def test_python_finalization_error(self):
# QueueFeederThread.
orig_start_new_thread = threading._start_joinable_thread
nthread = 0
def mock_start_new_thread(func, *args):
def mock_start_new_thread(func, *args, **kwargs):
nonlocal nthread
if nthread >= 1:
raise RuntimeError("can't create new thread at "
"interpreter shutdown")
nthread += 1
return orig_start_new_thread(func, *args)
return orig_start_new_thread(func, *args, **kwargs)
with support.swap_attr(threading, '_start_joinable_thread',
mock_start_new_thread):

View file

@ -289,6 +289,54 @@ def joiner():
with self.assertRaisesRegex(RuntimeError, "Cannot join current thread"):
raise error
def test_join_with_timeout(self):
lock = thread.allocate_lock()
lock.acquire()
def thr():
lock.acquire()
with threading_helper.wait_threads_exit():
handle = thread.start_joinable_thread(thr)
handle.join(0.1)
self.assertFalse(handle.is_done())
lock.release()
handle.join()
self.assertTrue(handle.is_done())
def test_join_unstarted(self):
handle = thread._ThreadHandle()
with self.assertRaisesRegex(RuntimeError, "thread not started"):
handle.join()
def test_set_done_unstarted(self):
handle = thread._ThreadHandle()
with self.assertRaisesRegex(RuntimeError, "thread not started"):
handle._set_done()
def test_start_duplicate_handle(self):
lock = thread.allocate_lock()
lock.acquire()
def func():
lock.acquire()
handle = thread._ThreadHandle()
with threading_helper.wait_threads_exit():
thread.start_joinable_thread(func, handle=handle)
with self.assertRaisesRegex(RuntimeError, "thread already started"):
thread.start_joinable_thread(func, handle=handle)
lock.release()
handle.join()
def test_start_with_none_handle(self):
def func():
pass
with threading_helper.wait_threads_exit():
handle = thread.start_joinable_thread(func, handle=None)
handle.join()
class Barrier:
def __init__(self, num_threads):

View file

@ -408,7 +408,7 @@ def run(self):
def test_limbo_cleanup(self):
# Issue 7481: Failure to start thread should cleanup the limbo map.
def fail_new_thread(*args):
def fail_new_thread(*args, **kwargs):
raise threading.ThreadError()
_start_joinable_thread = threading._start_joinable_thread
threading._start_joinable_thread = fail_new_thread
@ -912,41 +912,6 @@ def f():
rc, out, err = assert_python_ok("-c", code)
self.assertEqual(err, b"")
def test_tstate_lock(self):
# Test an implementation detail of Thread objects.
started = _thread.allocate_lock()
finish = _thread.allocate_lock()
started.acquire()
finish.acquire()
def f():
started.release()
finish.acquire()
time.sleep(0.01)
# The tstate lock is None until the thread is started
t = threading.Thread(target=f)
self.assertIs(t._tstate_lock, None)
t.start()
started.acquire()
self.assertTrue(t.is_alive())
# The tstate lock can't be acquired when the thread is running
# (or suspended).
tstate_lock = t._tstate_lock
self.assertFalse(tstate_lock.acquire(timeout=0), False)
finish.release()
# When the thread ends, the state_lock can be successfully
# acquired.
self.assertTrue(tstate_lock.acquire(timeout=support.SHORT_TIMEOUT), False)
# But is_alive() is still True: we hold _tstate_lock now, which
# prevents is_alive() from knowing the thread's end-of-life C code
# is done.
self.assertTrue(t.is_alive())
# Let is_alive() find out the C code is done.
tstate_lock.release()
self.assertFalse(t.is_alive())
# And verify the thread disposed of _tstate_lock.
self.assertIsNone(t._tstate_lock)
t.join()
def test_repr_stopped(self):
# Verify that "stopped" shows up in repr(Thread) appropriately.
started = _thread.allocate_lock()
@ -1112,30 +1077,6 @@ def checker():
self.assertEqual(threading.getprofile(), old_profile)
self.assertEqual(sys.getprofile(), old_profile)
@cpython_only
def test_shutdown_locks(self):
for daemon in (False, True):
with self.subTest(daemon=daemon):
event = threading.Event()
thread = threading.Thread(target=event.wait, daemon=daemon)
# Thread.start() must add lock to _shutdown_locks,
# but only for non-daemon thread
thread.start()
tstate_lock = thread._tstate_lock
if not daemon:
self.assertIn(tstate_lock, threading._shutdown_locks)
else:
self.assertNotIn(tstate_lock, threading._shutdown_locks)
# unblock the thread and join it
event.set()
thread.join()
# Thread._stop() must remove tstate_lock from _shutdown_locks.
# Daemon threads must never add it to _shutdown_locks.
self.assertNotIn(tstate_lock, threading._shutdown_locks)
def test_locals_at_exit(self):
# bpo-19466: thread locals must not be deleted before destructors
# are called

View file

@ -36,8 +36,11 @@
_daemon_threads_allowed = _thread.daemon_threads_allowed
_allocate_lock = _thread.allocate_lock
_LockType = _thread.LockType
_set_sentinel = _thread._set_sentinel
_thread_shutdown = _thread._shutdown
_make_thread_handle = _thread._make_thread_handle
_ThreadHandle = _thread._ThreadHandle
get_ident = _thread.get_ident
_get_main_thread_ident = _thread._get_main_thread_ident
_is_main_interpreter = _thread._is_main_interpreter
try:
get_native_id = _thread.get_native_id
@ -847,25 +850,6 @@ def _newname(name_template):
_limbo = {}
_dangling = WeakSet()
# Set of Thread._tstate_lock locks of non-daemon threads used by _shutdown()
# to wait until all Python thread states get deleted:
# see Thread._set_tstate_lock().
_shutdown_locks_lock = _allocate_lock()
_shutdown_locks = set()
def _maintain_shutdown_locks():
"""
Drop any shutdown locks that don't correspond to running threads anymore.
Calling this from time to time avoids an ever-growing _shutdown_locks
set when Thread objects are not joined explicitly. See bpo-37788.
This must be called with _shutdown_locks_lock acquired.
"""
# If a lock was released, the corresponding thread has exited
to_remove = [lock for lock in _shutdown_locks if not lock.locked()]
_shutdown_locks.difference_update(to_remove)
# Main class for threads
@ -930,10 +914,8 @@ class is implemented.
self._ident = None
if _HAVE_THREAD_NATIVE_ID:
self._native_id = None
self._tstate_lock = None
self._handle = None
self._handle = _ThreadHandle()
self._started = Event()
self._is_stopped = False
self._initialized = True
# Copy of sys.stderr used by self._invoke_excepthook()
self._stderr = _sys.stderr
@ -947,28 +929,18 @@ def _after_fork(self, new_ident=None):
if new_ident is not None:
# This thread is alive.
self._ident = new_ident
if self._handle is not None:
assert self._handle.ident == new_ident
# bpo-42350: If the fork happens when the thread is already stopped
# (ex: after threading._shutdown() has been called), _tstate_lock
# is None. Do nothing in this case.
if self._tstate_lock is not None:
self._tstate_lock._at_fork_reinit()
self._tstate_lock.acquire()
assert self._handle.ident == new_ident
else:
# This thread isn't alive after fork: it doesn't have a tstate
# anymore.
self._is_stopped = True
self._tstate_lock = None
self._handle = None
# Otherwise, the thread is dead, Jim. _PyThread_AfterFork()
# already marked our handle done.
pass
def __repr__(self):
assert self._initialized, "Thread.__init__() was not called"
status = "initial"
if self._started.is_set():
status = "started"
self.is_alive() # easy way to get ._is_stopped set when appropriate
if self._is_stopped:
if self._handle.is_done():
status = "stopped"
if self._daemonic:
status += " daemon"
@ -996,7 +968,8 @@ def start(self):
_limbo[self] = self
try:
# Start joinable thread
self._handle = _start_joinable_thread(self._bootstrap)
_start_joinable_thread(self._bootstrap, handle=self._handle,
daemon=self.daemon)
except Exception:
with _active_limbo_lock:
del _limbo[self]
@ -1047,23 +1020,9 @@ def _set_ident(self):
def _set_native_id(self):
self._native_id = get_native_id()
def _set_tstate_lock(self):
"""
Set a lock object which will be released by the interpreter when
the underlying thread state (see pystate.h) gets deleted.
"""
self._tstate_lock = _set_sentinel()
self._tstate_lock.acquire()
if not self.daemon:
with _shutdown_locks_lock:
_maintain_shutdown_locks()
_shutdown_locks.add(self._tstate_lock)
def _bootstrap_inner(self):
try:
self._set_ident()
self._set_tstate_lock()
if _HAVE_THREAD_NATIVE_ID:
self._set_native_id()
self._started.set()
@ -1083,33 +1042,6 @@ def _bootstrap_inner(self):
finally:
self._delete()
def _stop(self):
# After calling ._stop(), .is_alive() returns False and .join() returns
# immediately. ._tstate_lock must be released before calling ._stop().
#
# Normal case: C code at the end of the thread's life
# (release_sentinel in _threadmodule.c) releases ._tstate_lock, and
# that's detected by our ._wait_for_tstate_lock(), called by .join()
# and .is_alive(). Any number of threads _may_ call ._stop()
# simultaneously (for example, if multiple threads are blocked in
# .join() calls), and they're not serialized. That's harmless -
# they'll just make redundant rebindings of ._is_stopped and
# ._tstate_lock. Obscure: we rebind ._tstate_lock last so that the
# "assert self._is_stopped" in ._wait_for_tstate_lock() always works
# (the assert is executed only if ._tstate_lock is None).
#
# Special case: _main_thread releases ._tstate_lock via this
# module's _shutdown() function.
lock = self._tstate_lock
if lock is not None:
assert not lock.locked()
self._is_stopped = True
self._tstate_lock = None
if not self.daemon:
with _shutdown_locks_lock:
# Remove our lock and other released locks from _shutdown_locks
_maintain_shutdown_locks()
def _delete(self):
"Remove current thread from the dict of currently running threads."
with _active_limbo_lock:
@ -1150,47 +1082,12 @@ def join(self, timeout=None):
if self is current_thread():
raise RuntimeError("cannot join current thread")
if timeout is None:
self._wait_for_tstate_lock()
else:
# the behavior of a negative timeout isn't documented, but
# historically .join(timeout=x) for x<0 has acted as if timeout=0
self._wait_for_tstate_lock(timeout=max(timeout, 0))
# the behavior of a negative timeout isn't documented, but
# historically .join(timeout=x) for x<0 has acted as if timeout=0
if timeout is not None:
timeout = max(timeout, 0)
if self._is_stopped:
self._join_os_thread()
def _join_os_thread(self):
# self._handle may be cleared post-fork
if self._handle is not None:
self._handle.join()
def _wait_for_tstate_lock(self, block=True, timeout=-1):
# Issue #18808: wait for the thread state to be gone.
# At the end of the thread's life, after all knowledge of the thread
# is removed from C data structures, C code releases our _tstate_lock.
# This method passes its arguments to _tstate_lock.acquire().
# If the lock is acquired, the C code is done, and self._stop() is
# called. That sets ._is_stopped to True, and ._tstate_lock to None.
lock = self._tstate_lock
if lock is None:
# already determined that the C code is done
assert self._is_stopped
return
try:
if lock.acquire(block, timeout):
lock.release()
self._stop()
except:
if lock.locked():
# bpo-45274: lock.acquire() acquired the lock, but the function
# was interrupted with an exception before reaching the
# lock.release(). It can happen if a signal handler raises an
# exception, like CTRL+C which raises KeyboardInterrupt.
lock.release()
self._stop()
raise
self._handle.join(timeout)
@property
def name(self):
@ -1241,13 +1138,7 @@ def is_alive(self):
"""
assert self._initialized, "Thread.__init__() not called"
if self._is_stopped or not self._started.is_set():
return False
self._wait_for_tstate_lock(False)
if not self._is_stopped:
return True
self._join_os_thread()
return False
return self._started.is_set() and not self._handle.is_done()
@property
def daemon(self):
@ -1456,18 +1347,14 @@ class _MainThread(Thread):
def __init__(self):
Thread.__init__(self, name="MainThread", daemon=False)
self._set_tstate_lock()
self._started.set()
self._set_ident()
self._ident = _get_main_thread_ident()
self._handle = _make_thread_handle(self._ident)
if _HAVE_THREAD_NATIVE_ID:
self._set_native_id()
with _active_limbo_lock:
_active[self._ident] = self
def _join_os_thread(self):
# No ThreadHandle for main thread
pass
# Helper thread-local instance to detect when a _DummyThread
# is collected. Not a part of the public API.
@ -1510,17 +1397,15 @@ def __init__(self):
daemon=_daemon_threads_allowed())
self._started.set()
self._set_ident()
self._handle = _make_thread_handle(self._ident)
if _HAVE_THREAD_NATIVE_ID:
self._set_native_id()
with _active_limbo_lock:
_active[self._ident] = self
_DeleteDummyThreadOnDel(self)
def _stop(self):
pass
def is_alive(self):
if not self._is_stopped and self._started.is_set():
if not self._handle.is_done() and self._started.is_set():
return True
raise RuntimeError("thread is not alive")
@ -1532,7 +1417,6 @@ def _after_fork(self, new_ident=None):
self.__class__ = _MainThread
self._name = 'MainThread'
self._daemonic = False
self._set_tstate_lock()
Thread._after_fork(self, new_ident=new_ident)
@ -1631,12 +1515,11 @@ def _shutdown():
"""
Wait until the Python thread state of all non-daemon threads get deleted.
"""
# Obscure: other threads may be waiting to join _main_thread. That's
# dubious, but some code does it. We can't wait for C code to release
# the main thread's tstate_lock - that won't happen until the interpreter
# is nearly dead. So we release it here. Note that just calling _stop()
# isn't enough: other threads may already be waiting on _tstate_lock.
if _main_thread._is_stopped and _is_main_interpreter():
# Obscure: other threads may be waiting to join _main_thread. That's
# dubious, but some code does it. We can't wait for it to be marked as done
# normally - that won't happen until the interpreter is nearly dead. So
# mark it done here.
if _main_thread._handle.is_done() and _is_main_interpreter():
# _shutdown() was already called
return
@ -1648,42 +1531,11 @@ def _shutdown():
for atexit_call in reversed(_threading_atexits):
atexit_call()
# Main thread
if _main_thread.ident == get_ident():
tlock = _main_thread._tstate_lock
# The main thread isn't finished yet, so its thread state lock can't
# have been released.
assert tlock is not None
if tlock.locked():
# It should have been released already by
# _PyInterpreterState_SetNotRunningMain(), but there may be
# embedders that aren't calling that yet.
tlock.release()
_main_thread._stop()
else:
# bpo-1596321: _shutdown() must be called in the main thread.
# If the threading module was not imported by the main thread,
# _main_thread is the thread which imported the threading module.
# In this case, ignore _main_thread, similar behavior than for threads
# spawned by C libraries or using _thread.start_new_thread().
pass
if _is_main_interpreter():
_main_thread._handle._set_done()
# Join all non-deamon threads
while True:
with _shutdown_locks_lock:
locks = list(_shutdown_locks)
_shutdown_locks.clear()
if not locks:
break
for lock in locks:
# mimic Thread.join()
lock.acquire()
lock.release()
# new threads can be spawned while we were waiting for the other
# threads to complete
# Wait for all non-daemon threads to exit.
_thread_shutdown()
def main_thread():
@ -1703,7 +1555,6 @@ def _after_fork():
# Reset _active_limbo_lock, in case we forked while the lock was held
# by another (non-forked) thread. http://bugs.python.org/issue874900
global _active_limbo_lock, _main_thread
global _shutdown_locks_lock, _shutdown_locks
_active_limbo_lock = RLock()
# fork() only copied the current thread; clear references to others.
@ -1719,10 +1570,6 @@ def _after_fork():
_main_thread = current
# reset _shutdown() locks: threads re-register their _tstate_lock below
_shutdown_locks_lock = _allocate_lock()
_shutdown_locks = set()
with _active_limbo_lock:
# Dangling thread instances must still have their locks reset,
# because someone may join() them.
@ -1739,7 +1586,6 @@ def _after_fork():
else:
# All the others are already stopped.
thread._after_fork()
thread._stop()
_limbo.clear()
_active.clear()

View file

@ -0,0 +1,7 @@
Fix a race in ``threading.Thread.join()``.
``threading._MainThread`` now always represents the main thread of the main
interpreter.
``PyThreadState.on_delete`` and ``PyThreadState.on_delete_data`` have been
removed.

File diff suppressed because it is too large Load diff

View file

@ -304,30 +304,6 @@ PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
}
}
_PyEventRc *
_PyEventRc_New(void)
{
_PyEventRc *erc = (_PyEventRc *)PyMem_RawCalloc(1, sizeof(_PyEventRc));
if (erc != NULL) {
erc->refcount = 1;
}
return erc;
}
void
_PyEventRc_Incref(_PyEventRc *erc)
{
_Py_atomic_add_ssize(&erc->refcount, 1);
}
void
_PyEventRc_Decref(_PyEventRc *erc)
{
if (_Py_atomic_add_ssize(&erc->refcount, -1) == 1) {
PyMem_RawFree(erc);
}
}
static int
unlock_once(_PyOnceFlag *o, int res)
{

View file

@ -1032,20 +1032,7 @@ _PyInterpreterState_SetRunningMain(PyInterpreterState *interp)
void
_PyInterpreterState_SetNotRunningMain(PyInterpreterState *interp)
{
PyThreadState *tstate = interp->threads.main;
assert(tstate == current_fast_get());
if (tstate->on_delete != NULL) {
// The threading module was imported for the first time in this
// thread, so it was set as threading._main_thread. (See gh-75698.)
// The thread has finished running the Python program so we mark
// the thread object as finished.
assert(tstate->_whence != _PyThreadState_WHENCE_THREADING);
tstate->on_delete(tstate->on_delete_data);
tstate->on_delete = NULL;
tstate->on_delete_data = NULL;
}
assert(interp->threads.main == current_fast_get());
interp->threads.main = NULL;
}
@ -1570,16 +1557,6 @@ PyThreadState_Clear(PyThreadState *tstate)
Py_CLEAR(tstate->context);
if (tstate->on_delete != NULL) {
// For the "main" thread of each interpreter, this is meant
// to be done in _PyInterpreterState_SetNotRunningMain().
// That leaves threads created by the threading module,
// and any threads killed by forking.
// However, we also accommodate "main" threads that still
// don't call _PyInterpreterState_SetNotRunningMain() yet.
tstate->on_delete(tstate->on_delete_data);
}
#ifdef Py_GIL_DISABLED
// Each thread should clear own freelists in free-threading builds.
struct _Py_object_freelists *freelists = _Py_object_freelists_GET();