gh-119213: Be More Careful About _PyArg_Parser.kwtuple Across Interpreters (gh-119331)

_PyArg_Parser holds static global data generated for modules by Argument Clinic.  The _PyArg_Parser.kwtuple field is a tuple object, even though it's stored within a static global.  In some cases the tuple is statically allocated and thus it's okay that it gets shared by multiple interpreters.  However, in other cases the tuple is set lazily, allocated from the heap using the active interprepreter at the point the tuple is needed.

This is a problem once that interpreter is destroyed since _PyArg_Parser.kwtuple becomes at dangling pointer, leading to crashes.  It isn't a problem if the tuple is allocated under the main interpreter, since its lifetime is bound to the lifetime of the runtime.  The solution here is to temporarily switch to the main interpreter.  The alternative would be to always statically allocate the tuple.

This change also fixes a bug where only the most recent parser was added to the global linked list.
This commit is contained in:
Eric Snow 2024-05-22 11:57:52 -04:00 committed by GitHub
parent d472b4f9fa
commit 81865002ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 144 additions and 3 deletions

View file

@ -1222,6 +1222,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sort));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(source));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(source_traceback));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(spam));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src_dir_fd));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stacklevel));

View file

@ -711,6 +711,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(sort)
STRUCT_FOR_ID(source)
STRUCT_FOR_ID(source_traceback)
STRUCT_FOR_ID(spam)
STRUCT_FOR_ID(src)
STRUCT_FOR_ID(src_dir_fd)
STRUCT_FOR_ID(stacklevel)

View file

@ -1220,6 +1220,7 @@ extern "C" {
INIT_ID(sort), \
INIT_ID(source), \
INIT_ID(source_traceback), \
INIT_ID(spam), \
INIT_ID(src), \
INIT_ID(src_dir_fd), \
INIT_ID(stacklevel), \

View file

@ -1974,6 +1974,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
string = &_Py_ID(source_traceback);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternInPlace(interp, &string);
string = &_Py_ID(spam);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternInPlace(interp, &string);
string = &_Py_ID(src);
assert(_PyUnicode_CheckConsistency(string, 1));
_PyUnicode_InternInPlace(interp, &string);

View file

@ -4,11 +4,17 @@
import sys
from test import support
from test.support import import_helper
from test.support import script_helper
from test.support import warnings_helper
# Skip this test if the _testcapi module isn't available.
_testcapi = import_helper.import_module('_testcapi')
from _testcapi import getargs_keywords, getargs_keyword_only
try:
import _testinternalcapi
except ImportError:
_testinternalcapi = NULL
# > How about the following counterproposal. This also changes some of
# > the other format codes to be a little more regular.
# >
@ -1346,6 +1352,33 @@ def test_nested_tuple(self):
"argument 1 must be sequence of length 1, not 0"):
parse(((),), {}, '(' + f + ')', ['a'])
@unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi')
def test_gh_119213(self):
rc, out, err = script_helper.assert_python_ok("-c", """if True:
from test import support
script = '''if True:
import _testinternalcapi
_testinternalcapi.gh_119213_getargs(spam='eggs')
'''
config = dict(
allow_fork=False,
allow_exec=False,
allow_threads=True,
allow_daemon_threads=False,
use_main_obmalloc=False,
gil=2,
check_multi_interp_extensions=True,
)
rc = support.run_in_subinterp_with_config(script, **config)
assert rc == 0
# The crash is different if the interpreter was not destroyed first.
#interpid = _testinternalcapi.create_interpreter()
#rc = _testinternalcapi.exec_interpreter(interpid, script)
#assert rc == 0
""")
self.assertEqual(rc, 0)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,3 @@
Non-builtin modules built with argument clinic were crashing if used in a
subinterpreter before the main interpreter. The objects that were causing
the problem by leaking between interpreters carelessly have been fixed.

View file

@ -2006,6 +2006,25 @@ has_inline_values(PyObject *self, PyObject *obj)
Py_RETURN_FALSE;
}
/*[clinic input]
gh_119213_getargs
spam: object = None
Test _PyArg_Parser.kwtuple
[clinic start generated code]*/
static PyObject *
gh_119213_getargs_impl(PyObject *module, PyObject *spam)
/*[clinic end generated code: output=d8d9c95d5b446802 input=65ef47511da80fc2]*/
{
// It must never have been called in the main interprer
assert(!_Py_IsMainInterpreter(PyInterpreterState_Get()));
return Py_NewRef(spam);
}
static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@ -2096,6 +2115,7 @@ static PyMethodDef module_functions[] = {
#ifdef _Py_TIER2
{"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
#endif
GH_119213_GETARGS_METHODDEF
{NULL, NULL} /* sentinel */
};

View file

@ -300,4 +300,64 @@ _testinternalcapi_test_long_numbits(PyObject *module, PyObject *Py_UNUSED(ignore
{
return _testinternalcapi_test_long_numbits_impl(module);
}
/*[clinic end generated code: output=9804015d77d36c77 input=a9049054013a1b77]*/
PyDoc_STRVAR(gh_119213_getargs__doc__,
"gh_119213_getargs($module, /, spam=None)\n"
"--\n"
"\n"
"Test _PyArg_Parser.kwtuple");
#define GH_119213_GETARGS_METHODDEF \
{"gh_119213_getargs", _PyCFunction_CAST(gh_119213_getargs), METH_FASTCALL|METH_KEYWORDS, gh_119213_getargs__doc__},
static PyObject *
gh_119213_getargs_impl(PyObject *module, PyObject *spam);
static PyObject *
gh_119213_getargs(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(spam), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"spam", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "gh_119213_getargs",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[1];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
PyObject *spam = Py_None;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
spam = args[0];
skip_optional_pos:
return_value = gh_119213_getargs_impl(module, spam);
exit:
return return_value;
}
/*[clinic end generated code: output=4d0770a1c20fbf40 input=a9049054013a1b77]*/

View file

@ -7,6 +7,7 @@
#include "pycore_dict.h" // _PyDict_HasOnlyStringKeys()
#include "pycore_modsupport.h" // export _PyArg_NoKeywords()
#include "pycore_pylifecycle.h" // _PyArg_Fini
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_pyerrors.h" // _Py_CalculateSuggestions()
@ -1947,7 +1948,23 @@ _parser_init(void *arg)
int owned;
PyObject *kwtuple = parser->kwtuple;
if (kwtuple == NULL) {
/* We may temporarily switch to the main interpreter to avoid
* creating a tuple that could outlive its owning interpreter. */
PyThreadState *save_tstate = NULL;
PyThreadState *temp_tstate = NULL;
if (!_Py_IsMainInterpreter(PyInterpreterState_Get())) {
temp_tstate = PyThreadState_New(_PyInterpreterState_Main());
if (temp_tstate == NULL) {
return -1;
}
save_tstate = PyThreadState_Swap(temp_tstate);
}
kwtuple = new_kwtuple(keywords, len, pos);
if (temp_tstate != NULL) {
PyThreadState_Clear(temp_tstate);
(void)PyThreadState_Swap(save_tstate);
PyThreadState_Delete(temp_tstate);
}
if (kwtuple == NULL) {
return -1;
}
@ -1969,8 +1986,8 @@ _parser_init(void *arg)
parser->next = _Py_atomic_load_ptr(&_PyRuntime.getargs.static_parsers);
do {
// compare-exchange updates parser->next on failure
} while (_Py_atomic_compare_exchange_ptr(&_PyRuntime.getargs.static_parsers,
&parser->next, parser));
} while (!_Py_atomic_compare_exchange_ptr(&_PyRuntime.getargs.static_parsers,
&parser->next, parser));
return 0;
}

View file

@ -51,6 +51,8 @@ def declare_parser(
#endif
"""
else:
# XXX Why do we not statically allocate the tuple
# for non-builtin modules?
declarations = """
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)