bpo-45256: Remove the usage of the C stack in Python to Python calls (GH-28488)

Ths commit inlines calls to Python functions in the eval loop and steals all the arguments in the call from the caller for
performance.
This commit is contained in:
Pablo Galindo Salgado 2021-10-09 16:51:30 +01:00 committed by GitHub
parent ec04db74e2
commit b4903afd4d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 224 additions and 64 deletions

View file

@ -31,6 +31,7 @@ typedef struct _interpreter_frame {
int f_lasti; /* Last instruction if called */
int stacktop; /* Offset of TOS from localsplus */
PyFrameState f_state; /* What state the frame is in */
int depth; /* Depth of the frame in a ceval loop */
PyObject *localsplus[1];
} InterpreterFrame;
@ -85,6 +86,7 @@ _PyFrame_InitializeSpecials(
frame->generator = NULL;
frame->f_lasti = -1;
frame->f_state = FRAME_CREATED;
frame->depth = 0;
}
/* Gets the pointer to the locals array

View file

@ -13,6 +13,7 @@ extern "C" {
#define _PyTuple_ITEMS(op) (_PyTuple_CAST(op)->ob_item)
extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t);
extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t);
#ifdef __cplusplus
}

View file

@ -1,7 +1,7 @@
# Sample script for use by test_gdb.py
def foo(a, b, c):
bar(a, b, c)
bar(a=a, b=b, c=c)
def bar(a, b, c):
baz(a, b, c)

View file

@ -734,8 +734,14 @@ def test_pyup_command(self):
cmds_after_breakpoint=['py-up', 'py-up'])
self.assertMultilineMatches(bt,
r'''^.*
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
id\(42\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\)
bar\(a=a, b=b, c=c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in <module> \(\)
foo\(1, 2, 3\)
$''')
@unittest.skipUnless(HAS_PYUP_PYDOWN, "test requires py-up/py-down commands")
@ -763,10 +769,18 @@ def test_up_then_down(self):
cmds_after_breakpoint=['py-up', 'py-up', 'py-down'])
self.assertMultilineMatches(bt,
r'''^.*
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
id\(42\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\)
bar\(a=a, b=b, c=c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in <module> \(\)
foo\(1, 2, 3\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
id\(42\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
$''')
class PyBtTests(DebuggerTests):
@ -785,7 +799,7 @@ def test_bt(self):
File ".*gdb_sample.py", line 7, in bar
baz\(a, b, c\)
File ".*gdb_sample.py", line 4, in foo
bar\(a, b, c\)
bar\(a=a, b=b, c=c\)
File ".*gdb_sample.py", line 12, in <module>
foo\(1, 2, 3\)
''')
@ -801,7 +815,7 @@ def test_bt_full(self):
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\)
bar\(a, b, c\)
bar\(a=a, b=b, c=c\)
#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in <module> \(\)
foo\(1, 2, 3\)
''')
@ -1008,7 +1022,13 @@ def test_locals_after_up(self):
bt = self.get_stack_trace(script=self.get_sample_script(),
cmds_after_breakpoint=['py-up', 'py-up', 'py-locals'])
self.assertMultilineMatches(bt,
r".*\na = 1\nb = 2\nc = 3\n.*")
r'''^.*
Locals for foo
a = 1
b = 2
c = 3
Locals for <module>
.*$''')
def setUpModule():

View file

@ -484,6 +484,26 @@ _PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
return (PyObject *)tuple;
}
PyObject *
_PyTuple_FromArraySteal(PyObject *const *src, Py_ssize_t n)
{
if (n == 0) {
return tuple_get_empty();
}
PyTupleObject *tuple = tuple_alloc(n);
if (tuple == NULL) {
return NULL;
}
PyObject **dst = tuple->ob_item;
for (Py_ssize_t i = 0; i < n; i++) {
PyObject *item = src[i];
dst[i] = item;
}
_PyObject_GC_TRACK(tuple);
return (PyObject *)tuple;
}
static PyObject *
tupleslice(PyTupleObject *a, Py_ssize_t ilow,
Py_ssize_t ihigh)

View file

@ -98,6 +98,12 @@ static int check_args_iterable(PyThreadState *, PyObject *func, PyObject *vararg
static void format_kwargs_error(PyThreadState *, PyObject *func, PyObject *kwargs);
static void format_awaitable_error(PyThreadState *, PyTypeObject *, int, int);
static int get_exception_handler(PyCodeObject *, int, int*, int*, int*);
static InterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFrameConstructor *con,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames, int steal_args);
static int
_PyEvalFrameClearAndPop(PyThreadState *tstate, InterpreterFrame * frame);
#define NAME_ERROR_MSG \
"name '%.200s' is not defined"
@ -1516,6 +1522,12 @@ trace_function_entry(PyThreadState *tstate, InterpreterFrame *frame)
return 0;
}
static PyObject *
make_coro(PyThreadState *tstate, PyFrameConstructor *con,
PyObject *locals,
PyObject* const* args, size_t argcount,
PyObject *kwnames);
static int
skip_backwards_over_extended_args(PyCodeObject *code, int offset) {
_Py_CODEUNIT *instrs = (_Py_CODEUNIT *)PyBytes_AS_STRING(code->co_code);
@ -1543,10 +1555,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *retval = NULL; /* Return value */
_Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;
if (_Py_EnterRecursiveCall(tstate, "")) {
return NULL;
}
CFrame cframe;
/* WARNING: Because the CFrame lives on the C stack,
@ -1558,9 +1566,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
cframe.previous = prev_cframe;
tstate->cframe = &cframe;
assert(frame->depth == 0);
/* push frame */
tstate->frame = frame;
start_frame:
if (_Py_EnterRecursiveCall(tstate, "")) {
tstate->recursion_depth++;
goto exit_eval_frame;
}
assert(frame == tstate->frame);
if (cframe.use_tracing) {
if (trace_function_entry(tstate, frame)) {
goto exit_eval_frame;
@ -1582,7 +1599,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}
}
resume_frame:
co = frame->f_code;
PyObject *names = co->co_names;
PyObject *consts = co->co_consts;
_Py_CODEUNIT *first_instr = co->co_firstinstr;
@ -1594,12 +1612,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
multiple values.
When the PREDICT() macros are enabled, some opcode pairs follow in
direct succession without updating frame->f_lasti. A successful
prediction effectively links the two codes together as if they
were a single new opcode; accordingly,frame->f_lasti will point to
the first code in the pair (for instance, GET_ITER followed by
FOR_ITER is effectively a single opcode and frame->f_lasti will point
to the beginning of the combined pair.)
direct succession. A successful prediction effectively links the two
codes together as if they were a single new opcode, but the value
of frame->f_lasti is correctly updated so potential inlined calls
or lookups of frame->f_lasti are aways correct when the macros are used.
*/
assert(frame->f_lasti >= -1);
_Py_CODEUNIT *next_instr = first_instr + frame->f_lasti + 1;
@ -1625,6 +1641,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
#endif
if (throwflag) { /* support for generator.throw() */
throwflag = 0;
goto error;
}
@ -4591,10 +4608,44 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
TARGET(CALL_FUNCTION) {
PREDICTED(CALL_FUNCTION);
PyObject **sp, *res;
sp = stack_pointer;
res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing);
stack_pointer = sp;
PyObject *res;
// Check if the call can be inlined or not
PyObject *function = PEEK(oparg + 1);
if (Py_TYPE(function) == &PyFunction_Type) {
PyCodeObject *code = (PyCodeObject*)PyFunction_GET_CODE(function);
PyObject *locals = code->co_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function);
if ((code->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) == 0) {
InterpreterFrame *new_frame = _PyEvalFramePushAndInit(
tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), locals, stack_pointer-oparg, oparg, NULL, 1);
if (new_frame == NULL) {
// When we exit here, we own all variables in the stack (the frame creation has not stolen
// any variable) so we need to clean the whole stack (done in the "error" label).
goto error;
}
STACK_SHRINK(oparg + 1);
assert(tstate->interp->eval_frame != NULL);
// The frame has stolen all the arguments from the stack, so there is no need to clean them up.```
Py_DECREF(function);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->depth = frame->depth + 1;
tstate->frame = frame = new_frame;
goto start_frame;
}
else {
/* Callable is a generator or coroutine function: create coroutine or generator. */
res = make_coro(tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), locals, stack_pointer-oparg, oparg, NULL);
STACK_SHRINK(oparg + 1);
for (int i = 0; i < oparg + 1; i++) {
Py_DECREF(stack_pointer[i]);
}
}
}
else {
PyObject **sp = stack_pointer;
res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing);
stack_pointer = sp;
}
PUSH(res);
if (res == NULL) {
goto error;
@ -5018,14 +5069,28 @@ MISS_WITH_OPARG_COUNTER(BINARY_ADD)
/* pop frame */
exit_eval_frame:
/* Restore previous cframe */
tstate->cframe = cframe.previous;
tstate->cframe->use_tracing = cframe.use_tracing;
if (PyDTrace_FUNCTION_RETURN_ENABLED())
dtrace_function_return(frame);
_Py_LeaveRecursiveCall(tstate);
if (frame->depth) {
_PyFrame_StackPush(frame->previous, retval);
if (_PyEvalFrameClearAndPop(tstate, frame)) {
retval = NULL;
}
frame = tstate->frame;
if (retval == NULL) {
assert(_PyErr_Occurred(tstate));
throwflag = 1;
}
retval = NULL;
goto resume_frame;
}
tstate->frame = frame->previous;
/* Restore previous cframe */
tstate->cframe = cframe.previous;
tstate->cframe->use_tracing = cframe.use_tracing;
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
@ -5336,7 +5401,7 @@ get_exception_handler(PyCodeObject *code, int index, int *level, int *handler, i
static int
initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
PyObject **localsplus, PyObject *const *args,
Py_ssize_t argcount, PyObject *kwnames)
Py_ssize_t argcount, PyObject *kwnames, int steal_args)
{
PyCodeObject *co = (PyCodeObject*)con->fc_code;
const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount;
@ -5346,8 +5411,9 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
Py_ssize_t i;
if (co->co_flags & CO_VARKEYWORDS) {
kwdict = PyDict_New();
if (kwdict == NULL)
if (kwdict == NULL) {
goto fail;
}
i = total_args;
if (co->co_flags & CO_VARARGS) {
i++;
@ -5369,14 +5435,21 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
}
for (j = 0; j < n; j++) {
PyObject *x = args[j];
Py_INCREF(x);
if (!steal_args) {
Py_INCREF(x);
}
assert(localsplus[j] == NULL);
localsplus[j] = x;
}
/* Pack other positional arguments into the *args argument */
if (co->co_flags & CO_VARARGS) {
PyObject *u = _PyTuple_FromArray(args + n, argcount - n);
PyObject *u = NULL;
if (steal_args) {
u = _PyTuple_FromArraySteal(args + n, argcount - n);
} else {
u = _PyTuple_FromArray(args + n, argcount - n);
}
if (u == NULL) {
goto fail;
}
@ -5442,6 +5515,9 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
if (PyDict_SetItem(kwdict, keyword, value) == -1) {
goto fail;
}
if (steal_args) {
Py_DECREF(value);
}
continue;
kw_found:
@ -5451,7 +5527,9 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
con->fc_qualname, keyword);
goto fail;
}
Py_INCREF(value);
if (!steal_args) {
Py_INCREF(value);
}
localsplus[j] = value;
}
}
@ -5555,7 +5633,7 @@ make_coro_frame(PyThreadState *tstate,
}
_PyFrame_InitializeSpecials(frame, con, locals, code->co_nlocalsplus);
assert(frame->frame_obj == NULL);
if (initialize_locals(tstate, con, frame->localsplus, args, argcount, kwnames)) {
if (initialize_locals(tstate, con, frame->localsplus, args, argcount, kwnames, 0)) {
_PyFrame_Clear(frame, 1);
return NULL;
}
@ -5581,17 +5659,30 @@ make_coro(PyThreadState *tstate, PyFrameConstructor *con,
return gen;
}
// If *steal_args* is set, the function will steal the references to all the arguments.
// In case of error, the function returns null and if *steal_args* is set, the caller
// will still own all the arguments.
static InterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFrameConstructor *con,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames)
size_t argcount, PyObject *kwnames, int steal_args)
{
InterpreterFrame * frame = _PyThreadState_PushFrame(tstate, con, locals);
if (frame == NULL) {
return NULL;
}
PyObject **localsarray = _PyFrame_GetLocalsArray(frame);
if (initialize_locals(tstate, con, localsarray, args, argcount, kwnames)) {
if (initialize_locals(tstate, con, localsarray, args, argcount, kwnames, steal_args)) {
if (steal_args) {
// If we failed to initialize locals, make sure the caller still own all the
// arguments. Notice that we only need to increase the reference count of the
// *valid* arguments (i.e. the ones that fit into the frame).
PyCodeObject *co = (PyCodeObject*)con->fc_code;
const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount;
for (Py_ssize_t i = 0; i < Py_MIN(argcount, total_args); i++) {
Py_XINCREF(frame->localsplus[i]);
}
}
_PyFrame_Clear(frame, 0);
return NULL;
}
@ -5606,9 +5697,9 @@ _PyEvalFrameClearAndPop(PyThreadState *tstate, InterpreterFrame * frame)
++tstate->recursion_depth;
assert(frame->frame_obj == NULL || frame->frame_obj->f_own_locals_memory == 0);
if (_PyFrame_Clear(frame, 0)) {
--tstate->recursion_depth;
return -1;
}
assert(frame->frame_obj == NULL);
--tstate->recursion_depth;
tstate->frame = frame->previous;
_PyThreadState_PopFrame(tstate, frame);
@ -5628,7 +5719,7 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con,
return make_coro(tstate, con, locals, args, argcount, kwnames);
}
InterpreterFrame *frame = _PyEvalFramePushAndInit(
tstate, con, locals, args, argcount, kwnames);
tstate, con, locals, args, argcount, kwnames, 0);
if (frame == NULL) {
return NULL;
}

View file

@ -45,6 +45,7 @@
# compatible (2.6+ and 3.0+). See #19308.
from __future__ import print_function
import gdb
import os
import locale
@ -991,6 +992,11 @@ def _f_nlocalsplus(self):
def _f_lasti(self):
return self._f_special("f_lasti", int_from_int)
def depth(self):
return self._f_special("depth", int_from_int)
def previous(self):
return self._f_special("previous", PyFramePtr)
def iter_globals(self):
'''
@ -1797,16 +1803,20 @@ def get_selected_bytecode_frame(cls):
def print_summary(self):
if self.is_evalframe():
pyop = self.get_pyop()
if pyop:
line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
if not pyop.is_optimized_out():
line = pyop.current_line()
if line is not None:
sys.stdout.write(' %s\n' % line.strip())
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
interp_frame = self.get_pyop()
while True:
if interp_frame:
line = interp_frame.get_truncated_repr(MAX_OUTPUT_LEN)
write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
if not interp_frame.is_optimized_out():
line = interp_frame.current_line()
if line is not None:
sys.stdout.write(' %s\n' % line.strip())
if interp_frame.depth() == 0:
break
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
interp_frame = interp_frame.previous()
else:
info = self.is_other_python_frame()
if info:
@ -1816,15 +1826,19 @@ def print_summary(self):
def print_traceback(self):
if self.is_evalframe():
pyop = self.get_pyop()
if pyop:
pyop.print_traceback()
if not pyop.is_optimized_out():
line = pyop.current_line()
if line is not None:
sys.stdout.write(' %s\n' % line.strip())
else:
sys.stdout.write(' (unable to read python frame information)\n')
interp_frame = self.get_pyop()
while True:
if interp_frame:
interp_frame.print_traceback()
if not interp_frame.is_optimized_out():
line = interp_frame.current_line()
if line is not None:
sys.stdout.write(' %s\n' % line.strip())
if interp_frame.depth() == 0:
break
else:
sys.stdout.write(' (unable to read python frame information)\n')
interp_frame = interp_frame.previous()
else:
info = self.is_other_python_frame()
if info:
@ -1914,11 +1928,15 @@ def invoke(self, args, from_tty):
def move_in_stack(move_up):
'''Move up or down the stack (for the py-up/py-down command)'''
# Important:
# The amount of frames that are printed out depends on how many frames are inlined
# in the same evaluation loop. As this command links directly the C stack with the
# Python stack, the results are sensitive to the number of inlined frames and this
# is likely to change between versions and optimizations.
frame = Frame.get_selected_python_frame()
if not frame:
print('Unable to locate python frame')
return
while frame:
if move_up:
iter_frame = frame.older()
@ -1940,9 +1958,10 @@ def move_in_stack(move_up):
print('Unable to find an older python frame')
else:
print('Unable to find a newer python frame')
class PyUp(gdb.Command):
'Select and print the python stack frame that called this one (if any)'
'Select and print all python stack frame in the same eval loop starting from the one that called this one (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-up",
@ -1954,7 +1973,7 @@ def invoke(self, args, from_tty):
move_in_stack(move_up=True)
class PyDown(gdb.Command):
'Select and print the python stack frame called by this one (if any)'
'Select and print all python stack frame in the same eval loop starting from the one called this one (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-down",
@ -2067,13 +2086,20 @@ def invoke(self, args, from_tty):
return
pyop_frame = frame.get_pyop()
if not pyop_frame:
print(UNABLE_READ_INFO_PYTHON_FRAME)
return
while True:
if not pyop_frame:
print(UNABLE_READ_INFO_PYTHON_FRAME)
for pyop_name, pyop_value in pyop_frame.iter_locals():
print('%s = %s'
% (pyop_name.proxyval(set()),
pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
sys.stdout.write('Locals for %s\n' % (pyop_frame.co_name.proxyval(set())))
for pyop_name, pyop_value in pyop_frame.iter_locals():
print('%s = %s'
% (pyop_name.proxyval(set()),
pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
if pyop_frame.depth() == 0:
break
pyop_frame = pyop_frame.previous()
PyLocals()