gh-109860: Use a New Thread State When Switching Interpreters, When Necessary (gh-110245)

In a few places we switch to another interpreter without knowing if it has a thread state associated with the current thread.  For the main interpreter there wasn't much of a problem, but for subinterpreters we were *mostly* okay re-using the tstate created with the interpreter (located via PyInterpreterState_ThreadHead()).  There was a good chance that tstate wasn't actually in use by another thread.

However, there are no guarantees of that.  Furthermore, re-using an already used tstate is currently fragile.  To address this, now we create a new thread state in each of those places and use it.

One consequence of this change is that PyInterpreterState_ThreadHead() may not return NULL (though that won't happen for the main interpreter).
This commit is contained in:
Eric Snow 2023-10-03 09:20:48 -06:00 committed by GitHub
parent 4227bfa8b2
commit f5198b09e1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 151 additions and 68 deletions

View file

@ -92,6 +92,15 @@ struct _ts {
/* padding to align to 4 bytes */
unsigned int :24;
} _status;
#ifdef Py_BUILD_CORE
# define _PyThreadState_WHENCE_NOTSET -1
# define _PyThreadState_WHENCE_UNKNOWN 0
# define _PyThreadState_WHENCE_INTERP 1
# define _PyThreadState_WHENCE_THREADING 2
# define _PyThreadState_WHENCE_GILSTATE 3
# define _PyThreadState_WHENCE_EXEC 4
#endif
int _whence;
int py_recursion_remaining;
int py_recursion_limit;

View file

@ -48,6 +48,7 @@ _Py_IsMainInterpreterFinalizing(PyInterpreterState *interp)
PyAPI_FUNC(int) _PyInterpreterState_SetRunningMain(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_SetNotRunningMain(PyInterpreterState *);
PyAPI_FUNC(int) _PyInterpreterState_IsRunningMain(PyInterpreterState *);
PyAPI_FUNC(int) _PyInterpreterState_FailIfRunningMain(PyInterpreterState *);
static inline const PyConfig *
@ -139,7 +140,9 @@ static inline PyInterpreterState* _PyInterpreterState_GET(void) {
// PyThreadState functions
extern PyThreadState * _PyThreadState_New(PyInterpreterState *interp);
extern PyThreadState * _PyThreadState_New(
PyInterpreterState *interp,
int whence);
extern void _PyThreadState_Bind(PyThreadState *tstate);
extern void _PyThreadState_DeleteExcept(PyThreadState *tstate);

View file

@ -185,6 +185,7 @@ extern PyTypeObject _PyExc_MemoryError;
#define _PyThreadState_INIT \
{ \
._whence = _PyThreadState_WHENCE_NOTSET, \
.py_recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
.context_ver = 1, \
}

View file

@ -1593,8 +1593,11 @@ def _shutdown():
# The main thread isn't finished yet, so its thread state lock can't
# have been released.
assert tlock is not None
assert tlock.locked()
tlock.release()
if tlock.locked():
# It should have been released already by
# _PyInterpreterState_SetNotRunningMain(), but there may be
# embedders that aren't calling that yet.
tlock.release()
_main_thread._stop()
else:
# bpo-1596321: _shutdown() must be called in the main thread.

View file

@ -1205,7 +1205,7 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs)
if (boot == NULL) {
return PyErr_NoMemory();
}
boot->tstate = _PyThreadState_New(interp);
boot->tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_THREADING);
if (boot->tstate == NULL) {
PyMem_RawFree(boot);
if (!PyErr_Occurred()) {

View file

@ -242,6 +242,11 @@ _sharedns_apply(_sharedns *shared, PyObject *ns)
// of the exception in the calling interpreter.
typedef struct _sharedexception {
PyInterpreterState *interp;
#define ERR_NOT_SET 0
#define ERR_NO_MEMORY 1
#define ERR_ALREADY_RUNNING 2
int code;
const char *name;
const char *msg;
} _sharedexception;
@ -263,14 +268,26 @@ _sharedexception_clear(_sharedexception *exc)
}
static const char *
_sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
_sharedexception_bind(PyObject *exc, int code, _sharedexception *sharedexc)
{
if (sharedexc->interp == NULL) {
sharedexc->interp = PyInterpreterState_Get();
}
if (code != ERR_NOT_SET) {
assert(exc == NULL);
assert(code > 0);
sharedexc->code = code;
return NULL;
}
assert(exc != NULL);
const char *failure = NULL;
PyObject *nameobj = PyUnicode_FromString(Py_TYPE(exc)->tp_name);
if (nameobj == NULL) {
failure = "unable to format exception type name";
code = ERR_NO_MEMORY;
goto error;
}
sharedexc->name = _copy_raw_string(nameobj);
@ -281,6 +298,7 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
} else {
failure = "unable to encode and copy exception type name";
}
code = ERR_NO_MEMORY;
goto error;
}
@ -288,6 +306,7 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
PyObject *msgobj = PyUnicode_FromFormat("%S", exc);
if (msgobj == NULL) {
failure = "unable to format exception message";
code = ERR_NO_MEMORY;
goto error;
}
sharedexc->msg = _copy_raw_string(msgobj);
@ -298,6 +317,7 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
} else {
failure = "unable to encode and copy exception message";
}
code = ERR_NO_MEMORY;
goto error;
}
}
@ -308,7 +328,10 @@ _sharedexception_bind(PyObject *exc, _sharedexception *sharedexc)
assert(failure != NULL);
PyErr_Clear();
_sharedexception_clear(sharedexc);
*sharedexc = no_exception;
*sharedexc = (_sharedexception){
.interp = sharedexc->interp,
.code = code,
};
return failure;
}
@ -316,6 +339,7 @@ static void
_sharedexception_apply(_sharedexception *exc, PyObject *wrapperclass)
{
if (exc->name != NULL) {
assert(exc->code == ERR_NOT_SET);
if (exc->msg != NULL) {
PyErr_Format(wrapperclass, "%s: %s", exc->name, exc->msg);
}
@ -324,9 +348,19 @@ _sharedexception_apply(_sharedexception *exc, PyObject *wrapperclass)
}
}
else if (exc->msg != NULL) {
assert(exc->code == ERR_NOT_SET);
PyErr_SetString(wrapperclass, exc->msg);
}
else if (exc->code == ERR_NO_MEMORY) {
PyErr_NoMemory();
}
else if (exc->code == ERR_ALREADY_RUNNING) {
assert(exc->interp != NULL);
assert(_PyInterpreterState_IsRunningMain(exc->interp));
_PyInterpreterState_FailIfRunningMain(exc->interp);
}
else {
assert(exc->code == ERR_NOT_SET);
PyErr_SetNone(wrapperclass);
}
}
@ -362,9 +396,16 @@ static int
_run_script(PyInterpreterState *interp, const char *codestr,
_sharedns *shared, _sharedexception *sharedexc)
{
int errcode = ERR_NOT_SET;
if (_PyInterpreterState_SetRunningMain(interp) < 0) {
// We skip going through the shared exception.
return -1;
assert(PyErr_Occurred());
// In the case where we didn't switch interpreters, it would
// be more efficient to leave the exception in place and return
// immediately. However, life is simpler if we don't.
PyErr_Clear();
errcode = ERR_ALREADY_RUNNING;
goto error;
}
PyObject *excval = NULL;
@ -403,16 +444,17 @@ _run_script(PyInterpreterState *interp, const char *codestr,
error:
excval = PyErr_GetRaisedException();
const char *failure = _sharedexception_bind(excval, sharedexc);
const char *failure = _sharedexception_bind(excval, errcode, sharedexc);
if (failure != NULL) {
fprintf(stderr,
"RunFailedError: script raised an uncaught exception (%s)",
failure);
PyErr_Clear();
}
Py_XDECREF(excval);
if (errcode != ERR_ALREADY_RUNNING) {
_PyInterpreterState_SetNotRunningMain(interp);
}
assert(!PyErr_Occurred());
_PyInterpreterState_SetNotRunningMain(interp);
return -1;
}
@ -421,6 +463,7 @@ _run_script_in_interpreter(PyObject *mod, PyInterpreterState *interp,
const char *codestr, PyObject *shareables)
{
module_state *state = get_module_state(mod);
assert(state != NULL);
_sharedns *shared = _get_shared_ns(shareables);
if (shared == NULL && PyErr_Occurred()) {
@ -429,50 +472,30 @@ _run_script_in_interpreter(PyObject *mod, PyInterpreterState *interp,
// Switch to interpreter.
PyThreadState *save_tstate = NULL;
PyThreadState *tstate = NULL;
if (interp != PyInterpreterState_Get()) {
// XXX gh-109860: Using the "head" thread isn't strictly correct.
PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
assert(tstate != NULL);
// Hack (until gh-109860): The interpreter's initial thread state
// is least likely to break.
while(tstate->next != NULL) {
tstate = tstate->next;
}
// We must do this check before switching interpreters, so any
// exception gets raised in the right one.
// XXX gh-109860: Drop this redundant check once we stop
// re-using tstates that might already be in use.
if (_PyInterpreterState_IsRunningMain(interp)) {
PyErr_SetString(PyExc_RuntimeError,
"interpreter already running");
if (shared != NULL) {
_sharedns_free(shared);
}
return -1;
}
tstate = PyThreadState_New(interp);
tstate->_whence = _PyThreadState_WHENCE_EXEC;
// XXX Possible GILState issues?
save_tstate = PyThreadState_Swap(tstate);
}
// Run the script.
_sharedexception exc = {NULL, NULL};
_sharedexception exc = (_sharedexception){ .interp = interp };
int result = _run_script(interp, codestr, shared, &exc);
// Switch back.
if (save_tstate != NULL) {
PyThreadState_Clear(tstate);
PyThreadState_Swap(save_tstate);
PyThreadState_Delete(tstate);
}
// Propagate any exception out to the caller.
if (exc.name != NULL) {
assert(state != NULL);
if (result < 0) {
assert(!PyErr_Occurred());
_sharedexception_apply(&exc, state->RunFailedError);
}
else if (result != 0) {
if (!PyErr_Occurred()) {
// We were unable to allocate a shared exception.
PyErr_NoMemory();
}
assert(PyErr_Occurred());
}
if (shared != NULL) {
@ -502,6 +525,7 @@ interp_create(PyObject *self, PyObject *args, PyObject *kwds)
const PyInterpreterConfig config = isolated
? (PyInterpreterConfig)_PyInterpreterConfig_INIT
: (PyInterpreterConfig)_PyInterpreterConfig_LEGACY_INIT;
// XXX Possible GILState issues?
PyThreadState *tstate = NULL;
PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config);
@ -517,6 +541,7 @@ interp_create(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
assert(tstate != NULL);
PyInterpreterState *interp = PyThreadState_GetInterpreter(tstate);
PyObject *idobj = PyInterpreterState_GetIDObject(interp);
if (idobj == NULL) {
@ -526,6 +551,10 @@ interp_create(PyObject *self, PyObject *args, PyObject *kwds)
PyThreadState_Swap(save_tstate);
return NULL;
}
PyThreadState_Clear(tstate);
PyThreadState_Delete(tstate);
_PyInterpreterState_RequireIDRef(interp, 1);
return idobj;
}
@ -573,14 +602,8 @@ interp_destroy(PyObject *self, PyObject *args, PyObject *kwds)
}
// Destroy the interpreter.
// XXX gh-109860: Using the "head" thread isn't strictly correct.
PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
assert(tstate != NULL);
// Hack (until gh-109860): The interpreter's initial thread state
// is least likely to break.
while(tstate->next != NULL) {
tstate = tstate->next;
}
PyThreadState *tstate = PyThreadState_New(interp);
tstate->_whence = _PyThreadState_WHENCE_INTERP;
// XXX Possible GILState issues?
PyThreadState *save_tstate = PyThreadState_Swap(tstate);
Py_EndInterpreter(tstate);

View file

@ -655,7 +655,8 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
return status;
}
PyThreadState *tstate = _PyThreadState_New(interp);
PyThreadState *tstate = _PyThreadState_New(interp,
_PyThreadState_WHENCE_INTERP);
if (tstate == NULL) {
return _PyStatus_ERR("can't make first thread");
}
@ -2050,7 +2051,8 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
return _PyStatus_OK();
}
PyThreadState *tstate = _PyThreadState_New(interp);
PyThreadState *tstate = _PyThreadState_New(interp,
_PyThreadState_WHENCE_INTERP);
if (tstate == NULL) {
PyInterpreterState_Delete(interp);
*tstate_p = NULL;

View file

@ -1094,9 +1094,7 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
int
_PyInterpreterState_SetRunningMain(PyInterpreterState *interp)
{
if (interp->threads.main != NULL) {
PyErr_SetString(PyExc_RuntimeError,
"interpreter already running");
if (_PyInterpreterState_FailIfRunningMain(interp) < 0) {
return -1;
}
PyThreadState *tstate = current_fast_get(&_PyRuntime);
@ -1113,7 +1111,20 @@ _PyInterpreterState_SetRunningMain(PyInterpreterState *interp)
void
_PyInterpreterState_SetNotRunningMain(PyInterpreterState *interp)
{
assert(interp->threads.main == current_fast_get(&_PyRuntime));
PyThreadState *tstate = interp->threads.main;
assert(tstate == current_fast_get(&_PyRuntime));
if (tstate->on_delete != NULL) {
// The threading module was imported for the first time in this
// thread, so it was set as threading._main_thread. (See gh-75698.)
// The thread has finished running the Python program so we mark
// the thread object as finished.
assert(tstate->_whence != _PyThreadState_WHENCE_THREADING);
tstate->on_delete(tstate->on_delete_data);
tstate->on_delete = NULL;
tstate->on_delete_data = NULL;
}
interp->threads.main = NULL;
}
@ -1123,6 +1134,17 @@ _PyInterpreterState_IsRunningMain(PyInterpreterState *interp)
return (interp->threads.main != NULL);
}
int
_PyInterpreterState_FailIfRunningMain(PyInterpreterState *interp)
{
if (interp->threads.main != NULL) {
PyErr_SetString(PyExc_RuntimeError,
"interpreter already running");
return -1;
}
return 0;
}
//----------
// accessors
@ -1183,8 +1205,10 @@ _PyInterpreterState_IDDecref(PyInterpreterState *interp)
PyThread_release_lock(interp->id_mutex);
if (refcount == 0 && interp->requires_idref) {
// XXX Using the "head" thread isn't strictly correct.
PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
PyThreadState *tstate = _PyThreadState_New(interp,
_PyThreadState_WHENCE_INTERP);
_PyThreadState_Bind(tstate);
// XXX Possible GILState issues?
PyThreadState *save_tstate = _PyThreadState_Swap(runtime, tstate);
Py_EndInterpreter(tstate);
@ -1338,7 +1362,14 @@ free_threadstate(PyThreadState *tstate)
{
// The initial thread state of the interpreter is allocated
// as part of the interpreter state so should not be freed.
if (tstate != &tstate->interp->_initial_thread) {
if (tstate == &tstate->interp->_initial_thread) {
// Restore to _PyThreadState_INIT.
tstate = &tstate->interp->_initial_thread;
memcpy(tstate,
&initial._main_interpreter._initial_thread,
sizeof(*tstate));
}
else {
PyMem_RawFree(tstate);
}
}
@ -1353,7 +1384,7 @@ free_threadstate(PyThreadState *tstate)
static void
init_threadstate(PyThreadState *tstate,
PyInterpreterState *interp, uint64_t id)
PyInterpreterState *interp, uint64_t id, int whence)
{
if (tstate->_status.initialized) {
Py_FatalError("thread state already initialized");
@ -1366,6 +1397,10 @@ init_threadstate(PyThreadState *tstate,
assert(tstate->next == NULL);
assert(tstate->prev == NULL);
assert(tstate->_whence == _PyThreadState_WHENCE_NOTSET);
assert(whence >= 0 && whence <= _PyThreadState_WHENCE_EXEC);
tstate->_whence = whence;
assert(id > 0);
tstate->id = id;
@ -1395,8 +1430,6 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
PyThreadState *next)
{
assert(interp->threads.head != tstate);
assert((next != NULL && tstate->id != 1) ||
(next == NULL && tstate->id == 1));
if (next != NULL) {
assert(next->prev == NULL || next->prev == tstate);
next->prev = tstate;
@ -1407,7 +1440,7 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
}
static PyThreadState *
new_threadstate(PyInterpreterState *interp)
new_threadstate(PyInterpreterState *interp, int whence)
{
PyThreadState *tstate;
_PyRuntimeState *runtime = interp->runtime;
@ -1430,10 +1463,10 @@ new_threadstate(PyInterpreterState *interp)
PyThreadState *old_head = interp->threads.head;
if (old_head == NULL) {
// It's the interpreter's initial thread state.
assert(id == 1);
used_newtstate = 0;
tstate = &interp->_initial_thread;
}
// XXX Re-use interp->_initial_thread if not in use?
else {
// Every valid interpreter must have at least one thread.
assert(id > 1);
@ -1446,7 +1479,7 @@ new_threadstate(PyInterpreterState *interp)
sizeof(*tstate));
}
init_threadstate(tstate, interp, id);
init_threadstate(tstate, interp, id, whence);
add_threadstate(interp, tstate, old_head);
HEAD_UNLOCK(runtime);
@ -1460,7 +1493,8 @@ new_threadstate(PyInterpreterState *interp)
PyThreadState *
PyThreadState_New(PyInterpreterState *interp)
{
PyThreadState *tstate = new_threadstate(interp);
PyThreadState *tstate = new_threadstate(interp,
_PyThreadState_WHENCE_UNKNOWN);
if (tstate) {
bind_tstate(tstate);
// This makes sure there's a gilstate tstate bound
@ -1474,16 +1508,16 @@ PyThreadState_New(PyInterpreterState *interp)
// This must be followed by a call to _PyThreadState_Bind();
PyThreadState *
_PyThreadState_New(PyInterpreterState *interp)
_PyThreadState_New(PyInterpreterState *interp, int whence)
{
return new_threadstate(interp);
return new_threadstate(interp, whence);
}
// We keep this for stable ABI compabibility.
PyAPI_FUNC(PyThreadState*)
_PyThreadState_Prealloc(PyInterpreterState *interp)
{
return _PyThreadState_New(interp);
return _PyThreadState_New(interp, _PyThreadState_WHENCE_UNKNOWN);
}
// We keep this around for (accidental) stable ABI compatibility.
@ -1580,6 +1614,12 @@ PyThreadState_Clear(PyThreadState *tstate)
Py_CLEAR(tstate->context);
if (tstate->on_delete != NULL) {
// For the "main" thread of each interpreter, this is meant
// to be done in _PyInterpreterState_SetNotRunningMain().
// That leaves threads created by the threading module,
// and any threads killed by forking.
// However, we also accommodate "main" threads that still
// don't call _PyInterpreterState_SetNotRunningMain() yet.
tstate->on_delete(tstate->on_delete_data);
}
@ -2240,7 +2280,9 @@ PyGILState_Ensure(void)
int has_gil;
if (tcur == NULL) {
/* Create a new Python thread state for this thread */
tcur = new_threadstate(runtime->gilstate.autoInterpreterState);
// XXX Use PyInterpreterState_EnsureThreadState()?
tcur = new_threadstate(runtime->gilstate.autoInterpreterState,
_PyThreadState_WHENCE_GILSTATE);
if (tcur == NULL) {
Py_FatalError("Couldn't create thread-state for new thread");
}