From 45e62a2bc1c0000e2e9b613fff6bebf2c26fcb93 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 20 Jun 2022 12:59:25 +0100 Subject: [PATCH] GH-93897: Store frame size in code object and de-opt if insufficient space on thread frame stack. (GH-93908) --- Include/cpython/code.h | 4 +-- Include/internal/pycore_frame.h | 50 ++++++++++++++++++--------------- Objects/codeobject.c | 2 ++ Objects/frameobject.c | 3 +- Python/ceval.c | 38 +++++++++---------------- Python/frame.c | 16 ----------- Python/pystate.c | 2 +- Tools/scripts/deepfreeze.py | 2 ++ 8 files changed, 50 insertions(+), 67 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 8e65474bb48..3ccd101415b 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -73,8 +73,8 @@ typedef uint16_t _Py_CODEUNIT; \ /* redundant values (derived from co_localsplusnames and \ co_localspluskinds) */ \ - int co_nlocalsplus; /* number of local + cell + free variables \ - */ \ + int co_nlocalsplus; /* number of local + cell + free variables */ \ + int co_framesize; /* Size of frame in words */ \ int co_nlocals; /* number of local variables */ \ int co_nplaincellvars; /* number of non-arg cell variables */ \ int co_ncellvars; /* total number of cell variables */ \ diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 405afd67d22..eed26fbb062 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -6,6 +6,7 @@ extern "C" { #include #include +#include "pycore_code.h" // STATS /* See Objects/frame_layout.md for an explanation of the frame stack * including explanation of the PyFrameObject and _PyInterpreterFrame @@ -94,20 +95,20 @@ static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, PyObject *value) { void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest); -/* Consumes reference to func */ +/* Consumes reference to func and locals */ static inline void _PyFrame_InitializeSpecials( _PyInterpreterFrame *frame, PyFunctionObject *func, - PyObject *locals, int nlocalsplus) + PyObject *locals, PyCodeObject *code) { frame->f_func = func; - frame->f_code = (PyCodeObject *)Py_NewRef(func->func_code); + frame->f_code = (PyCodeObject *)Py_NewRef(code); frame->f_builtins = func->func_builtins; frame->f_globals = func->func_globals; - frame->f_locals = Py_XNewRef(locals); - frame->stacktop = nlocalsplus; + frame->f_locals = locals; + frame->stacktop = code->co_nlocalsplus; frame->frame_obj = NULL; - frame->prev_instr = _PyCode_CODE(frame->f_code) - 1; + frame->prev_instr = _PyCode_CODE(code) - 1; frame->is_entry = false; frame->owner = FRAME_OWNED_BY_THREAD; } @@ -172,29 +173,32 @@ _PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame); void _PyFrame_LocalsToFast(_PyInterpreterFrame *frame, int clear); -extern _PyInterpreterFrame * -_PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size); -static inline _PyInterpreterFrame * -_PyThreadState_BumpFramePointer(PyThreadState *tstate, size_t size) +static inline bool +_PyThreadState_HasStackSpace(PyThreadState *tstate, int size) { - PyObject **base = tstate->datastack_top; - if (base) { - PyObject **top = base + size; - assert(tstate->datastack_limit); - if (top < tstate->datastack_limit) { - tstate->datastack_top = top; - return (_PyInterpreterFrame *)base; - } - } - return _PyThreadState_BumpFramePointerSlow(tstate, size); + return tstate->datastack_top + size < tstate->datastack_limit; } +extern _PyInterpreterFrame * +_PyThreadState_PushFrame(PyThreadState *tstate, size_t size); + void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame); -/* Consume reference to func */ -_PyInterpreterFrame * -_PyFrame_Push(PyThreadState *tstate, PyFunctionObject *func); +/* Pushes a frame without checking for space. + * Must be guarded by _PyThreadState_HasStackSpace() + * Consumes reference to func. */ +static inline _PyInterpreterFrame * +_PyFrame_PushUnchecked(PyThreadState *tstate, PyFunctionObject *func) +{ + CALL_STAT_INC(frames_pushed); + PyCodeObject *code = (PyCodeObject *)func->func_code; + _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top; + tstate->datastack_top += code->co_framesize; + assert(tstate->datastack_top < tstate->datastack_limit); + _PyFrame_InitializeSpecials(new_frame, func, NULL, code); + return new_frame; +} int _PyInterpreterFrame_GetLine(_PyInterpreterFrame *frame); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 8ac4e9914f8..556d3a31c75 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -4,6 +4,7 @@ #include "opcode.h" #include "structmember.h" // PyMemberDef #include "pycore_code.h" // _PyCodeConstructor +#include "pycore_frame.h" // FRAME_SPECIALS_SIZE #include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs #include "pycore_opcode.h" // _PyOpcode_Deopt #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -327,6 +328,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) /* derived values */ co->co_nlocalsplus = nlocalsplus; co->co_nlocals = nlocals; + co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE; co->co_nplaincellvars = nplaincellvars; co->co_ncellvars = ncellvars; co->co_nfreevars = nfreevars; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 545c1b6b90f..94bd245eafc 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -858,8 +858,9 @@ init_frame(_PyInterpreterFrame *frame, PyFunctionObject *func, PyObject *locals) { /* _PyFrame_InitializeSpecials consumes reference to func */ Py_INCREF(func); + Py_XINCREF(locals); PyCodeObject *code = (PyCodeObject *)func->func_code; - _PyFrame_InitializeSpecials(frame, func, locals, code->co_nlocalsplus); + _PyFrame_InitializeSpecials(frame, func, locals, code); for (Py_ssize_t i = 0; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = NULL; } diff --git a/Python/ceval.c b/Python/ceval.c index 77f80b90b93..19b2bb4d393 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2240,13 +2240,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; assert(code->co_argcount == 2); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, getitem); - if (new_frame == NULL) { - goto error; - } + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(2); new_frame->localsplus[0] = container; @@ -3664,13 +3661,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(f->func_version != func_version, LOAD_ATTR); PyCodeObject *code = (PyCodeObject *)f->func_code; assert(code->co_argcount == 1); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(fget); - _PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, f); - if (new_frame == NULL) { - goto error; - } + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, f); SET_TOP(NULL); int push_null = !(oparg & 1); STACK_SHRINK(push_null); @@ -4724,7 +4718,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Check if the call can be inlined or not if (Py_TYPE(function) == &PyFunction_Type && tstate->interp->eval_frame == NULL) { int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(function))->co_flags; - PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function); + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(function)); STACK_SHRINK(total_args); _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit( tstate, (PyFunctionObject *)function, locals, @@ -4810,11 +4804,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(func->func_version != read_u32(cache->func_version), CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); STAT_INC(CALL, hit); - _PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, func); - if (new_frame == NULL) { - goto error; - } + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(argcount); for (int i = 0; i < argcount; i++) { @@ -4846,11 +4838,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(argcount > code->co_argcount, CALL); int minargs = cache->min_args; DEOPT_IF(argcount < minargs, CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); STAT_INC(CALL, hit); - _PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, func); - if (new_frame == NULL) { - goto error; - } + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(argcount); for (int i = 0; i < argcount; i++) { @@ -6298,20 +6288,19 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, return -1; } -/* Consumes references to func and all the args */ +/* Consumes references to func, locals and all the args */ static _PyInterpreterFrame * _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, PyObject* const* args, size_t argcount, PyObject *kwnames) { PyCodeObject * code = (PyCodeObject *)func->func_code; - size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; CALL_STAT_INC(frames_pushed); - _PyInterpreterFrame *frame = _PyThreadState_BumpFramePointer(tstate, size); + _PyInterpreterFrame *frame = _PyThreadState_PushFrame(tstate, code->co_framesize); if (frame == NULL) { goto fail; } - _PyFrame_InitializeSpecials(frame, func, locals, code->co_nlocalsplus); + _PyFrame_InitializeSpecials(frame, func, locals, code); PyObject **localsarray = &frame->localsplus[0]; for (int i = 0; i < code->co_nlocalsplus; i++) { localsarray[i] = NULL; @@ -6355,8 +6344,9 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, PyObject *kwnames) { /* _PyEvalFramePushAndInit consumes the references - * to func and all its arguments */ + * to func, locals and all its arguments */ Py_INCREF(func); + Py_XINCREF(locals); for (size_t i = 0; i < argcount; i++) { Py_INCREF(args[i]); } diff --git a/Python/frame.c b/Python/frame.c index 9eaf41d09a8..206ecab44c1 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -114,22 +114,6 @@ _PyFrame_Clear(_PyInterpreterFrame *frame) Py_DECREF(frame->f_code); } -/* Consumes reference to func */ -_PyInterpreterFrame * -_PyFrame_Push(PyThreadState *tstate, PyFunctionObject *func) -{ - PyCodeObject *code = (PyCodeObject *)func->func_code; - size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; - CALL_STAT_INC(frames_pushed); - _PyInterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size); - if (new_frame == NULL) { - Py_DECREF(func); - return NULL; - } - _PyFrame_InitializeSpecials(new_frame, func, NULL, code->co_nlocalsplus); - return new_frame; -} - int _PyInterpreterFrame_GetLine(_PyInterpreterFrame *frame) { diff --git a/Python/pystate.c b/Python/pystate.c index 3cc7613f5b7..11cc122185f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2168,7 +2168,7 @@ push_chunk(PyThreadState *tstate, int size) } _PyInterpreterFrame * -_PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size) +_PyThreadState_PushFrame(PyThreadState *tstate, size_t size) { assert(size < INT_MAX/sizeof(PyObject *)); PyObject **base = tstate->datastack_top; diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 443fcf22749..de8fde448e3 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -118,6 +118,7 @@ def __init__(self, file: TextIO) -> None: self.write('#include "Python.h"') self.write('#include "internal/pycore_gc.h"') self.write('#include "internal/pycore_code.h"') + self.write('#include "internal/pycore_frame.h"') self.write('#include "internal/pycore_long.h"') self.write("") @@ -256,6 +257,7 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.field(code, "co_argcount") self.field(code, "co_posonlyargcount") self.field(code, "co_kwonlyargcount") + self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,") self.field(code, "co_stacksize") self.field(code, "co_firstlineno") self.write(f".co_nlocalsplus = {len(localsplusnames)},")