GH-112354: Initial implementation of warm up on exits and trace-stitching (GH-114142)

This commit is contained in:
Mark Shannon 2024-02-20 09:39:55 +00:00 committed by GitHub
parent acda1757bc
commit 7b21403ccd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 744 additions and 198 deletions

1
.gitattributes vendored
View file

@ -77,6 +77,7 @@ Include/internal/pycore_opcode.h generated
Include/internal/pycore_opcode_metadata.h generated
Include/internal/pycore_*_generated.h generated
Include/internal/pycore_uop_ids.h generated
Include/internal/pycore_uop_metadata.h generated
Include/opcode.h generated
Include/opcode_ids.h generated
Include/token.h generated

View file

@ -33,16 +33,28 @@ typedef struct {
typedef struct {
uint16_t opcode;
uint16_t oparg;
uint32_t target;
union {
uint32_t target;
uint32_t exit_index;
};
uint64_t operand; // A cache entry
} _PyUOpInstruction;
typedef struct _exit_data {
uint32_t target;
int16_t temperature;
const struct _PyExecutorObject *executor;
} _PyExitData;
typedef struct _PyExecutorObject {
PyObject_VAR_HEAD
const _PyUOpInstruction *trace;
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
void *jit_code;
uint32_t exit_count;
uint32_t code_size;
size_t jit_size;
_PyUOpInstruction trace[1];
void *jit_code;
_PyExitData exits[1];
} _PyExecutorObject;
typedef struct _PyOptimizerObject _PyOptimizerObject;
@ -59,6 +71,7 @@ typedef struct _PyOptimizerObject {
/* These thresholds are treated as signed so do not exceed INT16_MAX
* Use INT16_MAX to indicate that the optimizer should never be called */
uint16_t resume_threshold;
uint16_t side_threshold;
uint16_t backedge_threshold;
/* Data needed by the optimizer goes here, but is opaque to the VM */
} _PyOptimizerObject;
@ -73,16 +86,16 @@ PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *in
_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);
int
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorClear(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);

View file

@ -212,6 +212,8 @@ struct _ts {
/* The thread's exception stack entry. (Always the last entry.) */
_PyErr_StackItem exc_state;
PyObject *previous_executor;
};
#ifdef Py_DEBUG

View file

@ -237,10 +237,13 @@ struct _is {
struct callable_cache callable_cache;
_PyOptimizerObject *optimizer;
_PyExecutorObject *executor_list_head;
/* These values are shifted and offset to speed up check in JUMP_BACKWARD */
/* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
uint32_t optimizer_resume_threshold;
uint32_t optimizer_backedge_threshold;
uint16_t optimizer_side_threshold;
uint32_t next_func_version;
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;

View file

@ -13,7 +13,7 @@ extern "C" {
typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate);
int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length);
int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length);
void _PyJIT_Free(_PyExecutorObject *executor);
#endif // _Py_JIT

View file

@ -909,8 +909,9 @@ enum InstructionFormat {
#define HAS_DEOPT_FLAG (128)
#define HAS_ERROR_FLAG (256)
#define HAS_ESCAPES_FLAG (512)
#define HAS_PURE_FLAG (1024)
#define HAS_PASSTHROUGH_FLAG (2048)
#define HAS_EXIT_FLAG (1024)
#define HAS_PURE_FLAG (2048)
#define HAS_PASSTHROUGH_FLAG (4096)
#define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG))
#define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG))
#define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG))
@ -921,6 +922,7 @@ enum InstructionFormat {
#define OPCODE_HAS_DEOPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_DEOPT_FLAG))
#define OPCODE_HAS_ERROR(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_FLAG))
#define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG))
#define OPCODE_HAS_EXIT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_EXIT_FLAG))
#define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG))
#define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG))
@ -945,14 +947,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
[BEFORE_ASYNC_WITH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BEFORE_WITH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
[BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
[BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
[BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@ -1060,16 +1062,16 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
[LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
[LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG },
[LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@ -1118,8 +1120,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
[SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG },
[SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
[STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG },
[STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
@ -1133,12 +1135,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
[STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
[SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG },
[TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
[TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
[TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
[TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
[TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
[TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
[TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
[UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG },

View file

@ -139,7 +139,6 @@ extern "C" {
#define _CONTAINS_OP CONTAINS_OP
#define _CHECK_EG_MATCH CHECK_EG_MATCH
#define _CHECK_EXC_MATCH CHECK_EXC_MATCH
#define _JUMP_BACKWARD JUMP_BACKWARD
#define _POP_JUMP_IF_FALSE 339
#define _POP_JUMP_IF_TRUE 340
#define _IS_NONE 341
@ -237,8 +236,11 @@ extern "C" {
#define _CHECK_GLOBALS 384
#define _CHECK_BUILTINS 385
#define _INTERNAL_INCREMENT_OPT_COUNTER 386
#define _CHECK_VALIDITY_AND_SET_IP 387
#define MAX_UOP_ID 387
#define _COLD_EXIT 387
#define _START_EXECUTOR 388
#define _FATAL_ERROR 389
#define _CHECK_VALIDITY_AND_SET_IP 390
#define MAX_UOP_ID 390
#ifdef __cplusplus
}

View file

@ -32,22 +32,22 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_UNARY_NOT] = HAS_PURE_FLAG,
[_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_TO_BOOL_INT] = HAS_DEOPT_FLAG,
[_TO_BOOL_LIST] = HAS_DEOPT_FLAG,
[_TO_BOOL_NONE] = HAS_DEOPT_FLAG,
[_TO_BOOL_STR] = HAS_DEOPT_FLAG,
[_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG,
[_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
[_TO_BOOL_INT] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_TO_BOOL_LIST] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_TO_BOOL_NONE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_TO_BOOL_STR] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
[_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
[_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
[_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG,
[_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG,
[_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG,
[_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
[_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_LOAD_SUPER_ATTR_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
[_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
@ -193,14 +193,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG,
[_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG,
[_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG,
[_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG,
[_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG,
[_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG,
[_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG,
[_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG,
[_SET_IP] = 0,
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
[_EXIT_TRACE] = HAS_DEOPT_FLAG,
[_EXIT_TRACE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
[_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG,
@ -209,6 +209,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
[_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
[_COLD_EXIT] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_START_EXECUTOR] = 0,
[_FATAL_ERROR] = HAS_ESCAPES_FLAG,
[_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG,
};
@ -266,6 +269,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
[_CHECK_VALIDITY] = "_CHECK_VALIDITY",
[_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP",
[_COLD_EXIT] = "_COLD_EXIT",
[_COMPARE_OP] = "_COMPARE_OP",
[_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT",
[_COMPARE_OP_INT] = "_COMPARE_OP_INT",
@ -285,6 +289,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_END_SEND] = "_END_SEND",
[_EXIT_INIT_CHECK] = "_EXIT_INIT_CHECK",
[_EXIT_TRACE] = "_EXIT_TRACE",
[_FATAL_ERROR] = "_FATAL_ERROR",
[_FORMAT_SIMPLE] = "_FORMAT_SIMPLE",
[_FORMAT_WITH_SPEC] = "_FORMAT_WITH_SPEC",
[_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO",
@ -377,6 +382,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE",
[_SET_IP] = "_SET_IP",
[_SET_UPDATE] = "_SET_UPDATE",
[_START_EXECUTOR] = "_START_EXECUTOR",
[_STORE_ATTR] = "_STORE_ATTR",
[_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE",
[_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT",

View file

@ -331,6 +331,7 @@ def f():
# on the *very next* allocation:
gc.collect()
gc.set_threshold(1, 0, 0)
sys._clear_internal_caches()
# Okay, so here's the nightmare scenario:
# - We're tracing the resumption of a generator, which creates a new
# frame object.

View file

@ -794,6 +794,17 @@ def test_annotated_op(self):
self.run_cases_test(input, output)
def test_deopt_and_exit(self):
input = """
pure op(OP, (arg1 -- out)) {
DEOPT_IF(1);
EXIT_IF(1);
}
"""
output = ""
with self.assertRaises(Exception):
self.run_cases_test(input, output)
class TestGeneratedAbstractCases(unittest.TestCase):
def setUp(self) -> None:
super().setUp()

View file

@ -977,7 +977,9 @@ set_optimizer(PyObject *self, PyObject *opt)
if (opt == Py_None) {
opt = NULL;
}
PyUnstable_SetOptimizer((_PyOptimizerObject*)opt);
if (PyUnstable_SetOptimizer((_PyOptimizerObject*)opt) < 0) {
return NULL;
}
Py_RETURN_NONE;
}

View file

@ -340,12 +340,12 @@ dummy_func(
macro(TO_BOOL) = _SPECIALIZE_TO_BOOL + unused/2 + _TO_BOOL;
inst(TO_BOOL_BOOL, (unused/1, unused/2, value -- value)) {
DEOPT_IF(!PyBool_Check(value));
EXIT_IF(!PyBool_Check(value));
STAT_INC(TO_BOOL, hit);
}
inst(TO_BOOL_INT, (unused/1, unused/2, value -- res)) {
DEOPT_IF(!PyLong_CheckExact(value));
EXIT_IF(!PyLong_CheckExact(value));
STAT_INC(TO_BOOL, hit);
if (_PyLong_IsZero((PyLongObject *)value)) {
assert(_Py_IsImmortal(value));
@ -358,7 +358,7 @@ dummy_func(
}
inst(TO_BOOL_LIST, (unused/1, unused/2, value -- res)) {
DEOPT_IF(!PyList_CheckExact(value));
EXIT_IF(!PyList_CheckExact(value));
STAT_INC(TO_BOOL, hit);
res = Py_SIZE(value) ? Py_True : Py_False;
DECREF_INPUTS();
@ -366,13 +366,13 @@ dummy_func(
inst(TO_BOOL_NONE, (unused/1, unused/2, value -- res)) {
// This one is a bit weird, because we expect *some* failures:
DEOPT_IF(!Py_IsNone(value));
EXIT_IF(!Py_IsNone(value));
STAT_INC(TO_BOOL, hit);
res = Py_False;
}
inst(TO_BOOL_STR, (unused/1, unused/2, value -- res)) {
DEOPT_IF(!PyUnicode_CheckExact(value));
EXIT_IF(!PyUnicode_CheckExact(value));
STAT_INC(TO_BOOL, hit);
if (value == &_Py_STR(empty)) {
assert(_Py_IsImmortal(value));
@ -388,7 +388,7 @@ dummy_func(
inst(TO_BOOL_ALWAYS_TRUE, (unused/1, version/2, value -- res)) {
// This one is a bit weird, because we expect *some* failures:
assert(version);
DEOPT_IF(Py_TYPE(value)->tp_version_tag != version);
EXIT_IF(Py_TYPE(value)->tp_version_tag != version);
STAT_INC(TO_BOOL, hit);
DECREF_INPUTS();
res = Py_True;
@ -412,8 +412,8 @@ dummy_func(
};
op(_GUARD_BOTH_INT, (left, right -- left, right)) {
DEOPT_IF(!PyLong_CheckExact(left));
DEOPT_IF(!PyLong_CheckExact(right));
EXIT_IF(!PyLong_CheckExact(left));
EXIT_IF(!PyLong_CheckExact(right));
}
pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
@ -448,8 +448,8 @@ dummy_func(
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
DEOPT_IF(!PyFloat_CheckExact(left));
DEOPT_IF(!PyFloat_CheckExact(right));
EXIT_IF(!PyFloat_CheckExact(left));
EXIT_IF(!PyFloat_CheckExact(right));
}
pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
@ -484,8 +484,8 @@ dummy_func(
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;
op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
DEOPT_IF(!PyUnicode_CheckExact(left));
DEOPT_IF(!PyUnicode_CheckExact(right));
EXIT_IF(!PyUnicode_CheckExact(left));
EXIT_IF(!PyUnicode_CheckExact(right));
}
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
@ -1904,7 +1904,7 @@ dummy_func(
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
PyTypeObject *tp = Py_TYPE(owner);
assert(type_version != 0);
DEOPT_IF(tp->tp_version_tag != type_version);
EXIT_IF(tp->tp_version_tag != type_version);
}
op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) {
@ -2314,6 +2314,7 @@ dummy_func(
}
inst(JUMP_BACKWARD, (unused/1 --)) {
TIER_ONE_ONLY
CHECK_EVAL_BREAKER();
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
@ -2335,13 +2336,13 @@ dummy_func(
oparg >>= 8;
start--;
}
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
_PyExecutorObject *executor;
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor);
ERROR_IF(optimized < 0, error);
if (optimized) {
// Rewind and enter the executor:
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
this_instr[1].cache &= OPTIMIZER_BITS_MASK;
assert(tstate->previous_executor == NULL);
tstate->previous_executor = Py_None;
GOTO_TIER_TWO(executor);
}
else {
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
@ -2371,14 +2372,15 @@ dummy_func(
inst(ENTER_EXECUTOR, (--)) {
TIER_ONE_ONLY
CHECK_EVAL_BREAKER();
PyCodeObject *code = _PyFrame_GetCode(frame);
current_executor = code->co_executors->executors[oparg & 255];
assert(current_executor->vm_data.index == INSTR_OFFSET() - 1);
assert(current_executor->vm_data.code == code);
assert(current_executor->vm_data.valid);
Py_INCREF(current_executor);
GOTO_TIER_TWO();
_PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->previous_executor == NULL);
tstate->previous_executor = Py_None;
Py_INCREF(executor);
GOTO_TIER_TWO(executor);
}
replaced op(_POP_JUMP_IF_FALSE, (cond -- )) {
@ -3997,26 +3999,26 @@ dummy_func(
inst(CACHE, (--)) {
TIER_ONE_ONLY
assert(0 && "Executing a cache.");
Py_UNREACHABLE();
Py_FatalError("Executing a cache.");
}
inst(RESERVED, (--)) {
TIER_ONE_ONLY
assert(0 && "Executing RESERVED instruction.");
Py_UNREACHABLE();
Py_FatalError("Executing RESERVED instruction.");
}
///////// Tier-2 only opcodes /////////
op (_GUARD_IS_TRUE_POP, (flag -- )) {
SYNC_SP();
DEOPT_IF(!Py_IsTrue(flag));
EXIT_IF(!Py_IsTrue(flag));
assert(Py_IsTrue(flag));
}
op (_GUARD_IS_FALSE_POP, (flag -- )) {
SYNC_SP();
DEOPT_IF(!Py_IsFalse(flag));
EXIT_IF(!Py_IsFalse(flag));
assert(Py_IsFalse(flag));
}
@ -4024,18 +4026,20 @@ dummy_func(
SYNC_SP();
if (!Py_IsNone(val)) {
Py_DECREF(val);
DEOPT_IF(1);
EXIT_IF(1);
}
}
op (_GUARD_IS_NOT_NONE_POP, (val -- )) {
SYNC_SP();
DEOPT_IF(Py_IsNone(val));
EXIT_IF(Py_IsNone(val));
Py_DECREF(val);
}
op(_JUMP_TO_TOP, (--)) {
next_uop = current_executor->trace;
#ifndef _Py_JIT
next_uop = &current_executor->trace[1];
#endif
CHECK_EVAL_BREAKER();
}
@ -4055,7 +4059,7 @@ dummy_func(
op(_EXIT_TRACE, (--)) {
TIER_TWO_ONLY
DEOPT_IF(1);
EXIT_IF(1);
}
op(_CHECK_VALIDITY, (--)) {
@ -4101,6 +4105,58 @@ dummy_func(
exe->count++;
}
/* Only used for handling cold side exits, should never appear in
* a normal trace or as part of an instruction.
*/
op(_COLD_EXIT, (--)) {
TIER_TWO_ONLY
_PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
_PyExitData *exit = &previous->exits[oparg];
exit->temperature++;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
GOTO_TIER_ONE(target);
}
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
} else {
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;
ERROR_IF(1, error);
}
GOTO_TIER_ONE(target);
}
}
/* We need two references. One to store in exit->executor and
* one to keep the executor alive when executing. */
Py_INCREF(executor);
exit->executor = executor;
GOTO_TIER_TWO(executor);
}
op(_START_EXECUTOR, (executor/4 --)) {
TIER_TWO_ONLY
Py_DECREF(tstate->previous_executor);
tstate->previous_executor = NULL;
#ifndef _Py_JIT
current_executor = (_PyExecutorObject*)executor;
#endif
}
op(_FATAL_ERROR, (--)) {
TIER_TWO_ONLY
assert(0);
Py_FatalError("Fatal error uop executed.");
}
op(_CHECK_VALIDITY_AND_SET_IP, (instr_ptr/4 --)) {
TIER_TWO_ONLY
DEOPT_IF(!current_executor->vm_data.valid);

View file

@ -16,6 +16,7 @@
#include "pycore_moduleobject.h" // PyModuleObject
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_opcode_metadata.h" // EXTRA_CASES
#include "pycore_optimizer.h" // _PyUOpExecutor_Type
#include "pycore_opcode_utils.h" // MAKE_FUNCTION_*
#include "pycore_pyerrors.h" // _PyErr_GetRaisedException()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
@ -738,15 +739,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
goto resume_with_error;
}
/* State shared between Tier 1 and Tier 2 interpreter */
_PyExecutorObject *current_executor = NULL;
/* Local "register" variables.
* These are cached values from the frame and code object. */
_Py_CODEUNIT *next_instr;
PyObject **stack_pointer;
#ifndef _Py_JIT
/* Tier 2 interpreter state */
_PyExecutorObject *current_executor = NULL;
const _PyUOpInstruction *next_uop = NULL;
#endif
start_frame:
if (_Py_EnterRecursivePy(tstate)) {
@ -960,18 +962,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
enter_tier_two:
#ifdef _Py_JIT
; // ;)
jit_func jitted = current_executor->jit_code;
next_instr = jitted(frame, stack_pointer, tstate);
frame = tstate->current_frame;
Py_DECREF(current_executor);
if (next_instr == NULL) {
goto resume_with_error;
}
stack_pointer = _PyFrame_GetStackPointer(frame);
DISPATCH();
assert(0);
#else
#undef LOAD_IP
@ -1007,12 +998,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
#endif
OPT_STAT_INC(traces_executed);
_PyUOpInstruction *next_uop = current_executor->trace;
uint16_t uopcode;
#ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0;
#endif
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
for (;;) {
uopcode = next_uop->opcode;
DPRINTF(3,
@ -1075,23 +1066,39 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
frame->return_offset = 0; // Don't leave this random
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(current_executor);
tstate->previous_executor = NULL;
goto resume_with_error;
// Jump here from DEOPT_IF()
deoptimize:
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d -> %s]\n",
DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1),
_PyOpcode_OpName[frame->instr_ptr->op.code]);
_PyOpcode_OpName[next_instr->op.code]);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(uopcode, miss);
Py_DECREF(current_executor);
tstate->previous_executor = NULL;
DISPATCH();
// Jump here from EXIT_IF()
side_exit:
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(uopcode, miss);
uint32_t exit_index = next_uop[-1].exit_index;
assert(exit_index < current_executor->exit_count);
_PyExitData *exit = &current_executor->exits[exit_index];
DPRINTF(2, "SIDE EXIT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", exit %u, temp %d, target %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, exit_index, exit->temperature,
exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
Py_INCREF(exit->executor);
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_TWO(exit->executor);
#endif // _Py_JIT
}
#if defined(__GNUC__)
# pragma GCC diagnostic pop
#elif defined(_MSC_VER) /* MS_WINDOWS */

View file

@ -394,7 +394,36 @@ stack_pointer = _PyFrame_GetStackPointer(frame);
/* Tier-switching macros. */
#define GOTO_TIER_TWO() goto enter_tier_two;
#ifdef _Py_JIT
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
jit_func jitted = (EXECUTOR)->jit_code; \
next_instr = jitted(frame, stack_pointer, tstate); \
Py_DECREF(tstate->previous_executor); \
tstate->previous_executor = NULL; \
frame = tstate->current_frame; \
if (next_instr == NULL) { \
goto resume_with_error; \
} \
stack_pointer = _PyFrame_GetStackPointer(frame); \
DISPATCH(); \
} while (0)
#else
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
next_uop = (EXECUTOR)->trace; \
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \
goto enter_tier_two; \
} while (0)
#endif
#define GOTO_TIER_ONE(TARGET) \
do { \
Py_DECREF(tstate->previous_executor); \
tstate->previous_executor = NULL; \
next_instr = target; \
DISPATCH(); \
} while (0)
#define CURRENT_OPARG() (next_uop[-1].oparg)

View file

@ -141,7 +141,7 @@
case _TO_BOOL_BOOL: {
PyObject *value;
value = stack_pointer[-1];
if (!PyBool_Check(value)) goto deoptimize;
if (!PyBool_Check(value)) goto side_exit;
STAT_INC(TO_BOOL, hit);
break;
}
@ -150,7 +150,7 @@
PyObject *value;
PyObject *res;
value = stack_pointer[-1];
if (!PyLong_CheckExact(value)) goto deoptimize;
if (!PyLong_CheckExact(value)) goto side_exit;
STAT_INC(TO_BOOL, hit);
if (_PyLong_IsZero((PyLongObject *)value)) {
assert(_Py_IsImmortal(value));
@ -168,7 +168,7 @@
PyObject *value;
PyObject *res;
value = stack_pointer[-1];
if (!PyList_CheckExact(value)) goto deoptimize;
if (!PyList_CheckExact(value)) goto side_exit;
STAT_INC(TO_BOOL, hit);
res = Py_SIZE(value) ? Py_True : Py_False;
Py_DECREF(value);
@ -181,7 +181,7 @@
PyObject *res;
value = stack_pointer[-1];
// This one is a bit weird, because we expect *some* failures:
if (!Py_IsNone(value)) goto deoptimize;
if (!Py_IsNone(value)) goto side_exit;
STAT_INC(TO_BOOL, hit);
res = Py_False;
stack_pointer[-1] = res;
@ -192,7 +192,7 @@
PyObject *value;
PyObject *res;
value = stack_pointer[-1];
if (!PyUnicode_CheckExact(value)) goto deoptimize;
if (!PyUnicode_CheckExact(value)) goto side_exit;
STAT_INC(TO_BOOL, hit);
if (value == &_Py_STR(empty)) {
assert(_Py_IsImmortal(value));
@ -214,7 +214,7 @@
uint32_t version = (uint32_t)CURRENT_OPERAND();
// This one is a bit weird, because we expect *some* failures:
assert(version);
if (Py_TYPE(value)->tp_version_tag != version) goto deoptimize;
if (Py_TYPE(value)->tp_version_tag != version) goto side_exit;
STAT_INC(TO_BOOL, hit);
Py_DECREF(value);
res = Py_True;
@ -238,8 +238,8 @@
PyObject *left;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (!PyLong_CheckExact(left)) goto deoptimize;
if (!PyLong_CheckExact(right)) goto deoptimize;
if (!PyLong_CheckExact(left)) goto side_exit;
if (!PyLong_CheckExact(right)) goto side_exit;
break;
}
@ -296,8 +296,8 @@
PyObject *left;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (!PyFloat_CheckExact(left)) goto deoptimize;
if (!PyFloat_CheckExact(right)) goto deoptimize;
if (!PyFloat_CheckExact(left)) goto side_exit;
if (!PyFloat_CheckExact(right)) goto side_exit;
break;
}
@ -354,8 +354,8 @@
PyObject *left;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (!PyUnicode_CheckExact(left)) goto deoptimize;
if (!PyUnicode_CheckExact(right)) goto deoptimize;
if (!PyUnicode_CheckExact(left)) goto side_exit;
if (!PyUnicode_CheckExact(right)) goto side_exit;
break;
}
@ -1623,7 +1623,7 @@
uint32_t type_version = (uint32_t)CURRENT_OPERAND();
PyTypeObject *tp = Py_TYPE(owner);
assert(type_version != 0);
if (tp->tp_version_tag != type_version) goto deoptimize;
if (tp->tp_version_tag != type_version) goto side_exit;
break;
}
@ -2013,8 +2013,6 @@
break;
}
/* _JUMP_BACKWARD is not a viable micro-op for tier 2 */
/* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */
/* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */
@ -3318,7 +3316,7 @@
PyObject *flag;
flag = stack_pointer[-1];
stack_pointer += -1;
if (!Py_IsTrue(flag)) goto deoptimize;
if (!Py_IsTrue(flag)) goto side_exit;
assert(Py_IsTrue(flag));
break;
}
@ -3327,7 +3325,7 @@
PyObject *flag;
flag = stack_pointer[-1];
stack_pointer += -1;
if (!Py_IsFalse(flag)) goto deoptimize;
if (!Py_IsFalse(flag)) goto side_exit;
assert(Py_IsFalse(flag));
break;
}
@ -3338,7 +3336,7 @@
stack_pointer += -1;
if (!Py_IsNone(val)) {
Py_DECREF(val);
if (1) goto deoptimize;
if (1) goto side_exit;
}
break;
}
@ -3347,13 +3345,15 @@
PyObject *val;
val = stack_pointer[-1];
stack_pointer += -1;
if (Py_IsNone(val)) goto deoptimize;
if (Py_IsNone(val)) goto side_exit;
Py_DECREF(val);
break;
}
case _JUMP_TO_TOP: {
next_uop = current_executor->trace;
#ifndef _Py_JIT
next_uop = &current_executor->trace[1];
#endif
CHECK_EVAL_BREAKER();
break;
}
@ -3378,7 +3378,7 @@
case _EXIT_TRACE: {
TIER_TWO_ONLY
if (1) goto deoptimize;
if (1) goto side_exit;
break;
}
@ -3457,6 +3457,60 @@
break;
}
case _COLD_EXIT: {
oparg = CURRENT_OPARG();
TIER_TWO_ONLY
_PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
_PyExitData *exit = &previous->exits[oparg];
exit->temperature++;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
GOTO_TIER_ONE(target);
}
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
} else {
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;
if (1) goto error_tier_two;
}
GOTO_TIER_ONE(target);
}
}
/* We need two references. One to store in exit->executor and
* one to keep the executor alive when executing. */
Py_INCREF(executor);
exit->executor = executor;
GOTO_TIER_TWO(executor);
break;
}
case _START_EXECUTOR: {
PyObject *executor = (PyObject *)CURRENT_OPERAND();
TIER_TWO_ONLY
Py_DECREF(tstate->previous_executor);
tstate->previous_executor = NULL;
#ifndef _Py_JIT
current_executor = (_PyExecutorObject*)executor;
#endif
break;
}
case _FATAL_ERROR: {
TIER_TWO_ONLY
assert(0);
Py_FatalError("Fatal error uop executed.");
break;
}
case _CHECK_VALIDITY_AND_SET_IP: {
PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND();
TIER_TWO_ONLY

View file

@ -741,7 +741,8 @@
INSTRUCTION_STATS(CACHE);
TIER_ONE_ONLY
assert(0 && "Executing a cache.");
Py_UNREACHABLE();
Py_FatalError("Executing a cache.");
DISPATCH();
}
TARGET(CALL) {
@ -2369,12 +2370,14 @@
TIER_ONE_ONLY
CHECK_EVAL_BREAKER();
PyCodeObject *code = _PyFrame_GetCode(frame);
current_executor = code->co_executors->executors[oparg & 255];
assert(current_executor->vm_data.index == INSTR_OFFSET() - 1);
assert(current_executor->vm_data.code == code);
assert(current_executor->vm_data.valid);
Py_INCREF(current_executor);
GOTO_TIER_TWO();
_PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->previous_executor == NULL);
tstate->previous_executor = Py_None;
Py_INCREF(executor);
GOTO_TIER_TWO(executor);
DISPATCH();
}
@ -3262,6 +3265,7 @@
next_instr += 2;
INSTRUCTION_STATS(JUMP_BACKWARD);
/* Skip 1 cache entry */
TIER_ONE_ONLY
CHECK_EVAL_BREAKER();
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
@ -3283,13 +3287,13 @@
oparg >>= 8;
start--;
}
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
_PyExecutorObject *executor;
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor);
if (optimized < 0) goto error;
if (optimized) {
// Rewind and enter the executor:
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
this_instr[1].cache &= OPTIMIZER_BITS_MASK;
assert(tstate->previous_executor == NULL);
tstate->previous_executor = Py_None;
GOTO_TIER_TWO(executor);
}
else {
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
@ -4778,7 +4782,8 @@
INSTRUCTION_STATS(RESERVED);
TIER_ONE_ONLY
assert(0 && "Executing RESERVED instruction.");
Py_UNREACHABLE();
Py_FatalError("Executing RESERVED instruction.");
DISPATCH();
}
TARGET(RESUME) {

View file

@ -300,13 +300,13 @@ emit(const StencilGroup *group, uint64_t patches[])
// Compiles executor in-place. Don't forget to call _PyJIT_Free later!
int
_PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length)
_PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length)
{
// Loop once to find the total compiled size:
size_t code_size = 0;
size_t data_size = 0;
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = &trace[i];
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
code_size += group->code.body_size;
data_size += group->data.body_size;
@ -323,8 +323,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t len
// Loop again to emit the code:
char *code = memory;
char *data = memory + code_size;
char *top = code;
if (trace[0].opcode == _START_EXECUTOR) {
// Don't want to execute this more than once:
top += stencil_groups[_START_EXECUTOR].code.body_size;
}
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = &trace[i];
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
// Think of patches as a dictionary mapping HoleValue to uint64_t:
uint64_t patches[] = GET_PATCHES();
@ -335,7 +340,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t len
patches[HoleValue_OPARG] = instruction->oparg;
patches[HoleValue_OPERAND] = instruction->operand;
patches[HoleValue_TARGET] = instruction->target;
patches[HoleValue_TOP] = (uint64_t)memory;
patches[HoleValue_TOP] = (uint64_t)top;
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;

View file

@ -2,6 +2,7 @@
#include "opcode.h"
#include "pycore_interp.h"
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
#include "pycore_opcode_utils.h" // MAX_REAL_OPCODE
#include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize()
@ -128,10 +129,11 @@ static _PyOptimizerObject _PyOptimizer_Default = {
.optimize = never_optimize,
.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
};
static uint32_t
shift_and_offset_threshold(uint16_t threshold)
shift_and_offset_threshold(uint32_t threshold)
{
return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
}
@ -140,41 +142,74 @@ _PyOptimizerObject *
PyUnstable_GetOptimizer(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
assert(interp->optimizer_backedge_threshold ==
shift_and_offset_threshold(interp->optimizer->backedge_threshold));
assert(interp->optimizer_resume_threshold ==
shift_and_offset_threshold(interp->optimizer->resume_threshold));
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
Py_INCREF(interp->optimizer);
return interp->optimizer;
}
static _PyExecutorObject *
make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *dependencies);
static int
init_cold_exit_executor(_PyExecutorObject *executor, int oparg);
static int cold_exits_initialized = 0;
static _PyExecutorObject COLD_EXITS[UOP_MAX_TRACE_LENGTH] = { 0 };
static const _PyBloomFilter EMPTY_FILTER = { 0 };
_PyOptimizerObject *
_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
{
if (optimizer == NULL) {
optimizer = &_PyOptimizer_Default;
}
else if (cold_exits_initialized == 0) {
cold_exits_initialized = 1;
for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
if (init_cold_exit_executor(&COLD_EXITS[i], i)) {
return NULL;
}
}
}
_PyOptimizerObject *old = interp->optimizer;
if (old == NULL) {
old = &_PyOptimizer_Default;
}
Py_INCREF(optimizer);
interp->optimizer = optimizer;
interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
interp->optimizer_side_threshold = optimizer->side_threshold;
if (optimizer == &_PyOptimizer_Default) {
assert(interp->optimizer_backedge_threshold > (1 << 16));
assert(interp->optimizer_resume_threshold > (1 << 16));
}
return old;
}
void
int
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
Py_DECREF(old);
Py_XDECREF(old);
return old == NULL ? -1 : 0;
}
/* Returns 1 if optimized, 0 if not optimized, and -1 for an error.
* If optimized, *executor_ptr contains a new reference to the executor
*/
int
_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
_PyOptimizer_Optimize(
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
{
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
assert(PyCode_Check(code));
@ -183,12 +218,11 @@ _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject
return 0;
}
_PyOptimizerObject *opt = interp->optimizer;
_PyExecutorObject *executor = NULL;
int err = opt->optimize(opt, frame, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
if (err <= 0) {
assert(executor == NULL);
return err;
}
assert(*executor_ptr != NULL);
int index = get_index_for_executor(code, start);
if (index < 0) {
/* Out of memory. Don't raise and assume that the
@ -197,11 +231,11 @@ _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject
* If an optimizer has already produced an executor,
* it might get confused by the executor disappearing,
* but there is not much we can do about that here. */
Py_DECREF(executor);
Py_DECREF(*executor_ptr);
return 0;
}
insert_executor(code, start, index, executor);
Py_DECREF(executor);
insert_executor(code, start, index, *executor_ptr);
assert((*executor_ptr)->vm_data.valid);
return 1;
}
@ -237,11 +271,12 @@ static PyMethodDef executor_methods[] = {
static void
uop_dealloc(_PyExecutorObject *self) {
_PyObject_GC_UNTRACK(self);
_Py_ExecutorClear(self);
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
PyObject_Free(self);
PyObject_GC_Del(self);
}
const char *
@ -253,7 +288,7 @@ _PyUOpName(int index)
static Py_ssize_t
uop_len(_PyExecutorObject *self)
{
return Py_SIZE(self);
return self->code_size;
}
static PyObject *
@ -292,15 +327,34 @@ PySequenceMethods uop_as_sequence = {
.sq_item = (ssizeargfunc)uop_item,
};
static int
executor_clear(PyObject *o)
{
_Py_ExecutorClear((_PyExecutorObject *)o);
return 0;
}
static int
executor_traverse(PyObject *o, visitproc visit, void *arg)
{
_PyExecutorObject *executor = (_PyExecutorObject *)o;
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_VISIT(executor->exits[i].executor);
}
return 0;
}
PyTypeObject _PyUOpExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "uop_executor",
.tp_basicsize = offsetof(_PyExecutorObject, trace),
.tp_itemsize = sizeof(_PyUOpInstruction),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_basicsize = offsetof(_PyExecutorObject, exits),
.tp_itemsize = 1,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
.tp_dealloc = (destructor)uop_dealloc,
.tp_as_sequence = &uop_as_sequence,
.tp_methods = executor_methods,
.tp_traverse = executor_traverse,
.tp_clear = executor_clear,
};
/* TO DO -- Generate these tables */
@ -324,6 +378,7 @@ BRANCH_TO_GUARD[4][2] = {
[POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
};
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333
@ -726,9 +781,10 @@ translate_bytecode_to_trace(
* NOPs are excluded from the count.
*/
static int
compute_used(_PyUOpInstruction *buffer, uint32_t *used)
compute_used(_PyUOpInstruction *buffer, uint32_t *used, int *exit_count_ptr)
{
int count = 0;
int exit_count = 0;
SET_BIT(used, 0);
for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
if (!BIT_IS_SET(used, i)) {
@ -736,6 +792,9 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used)
}
count++;
int opcode = buffer[i].opcode;
if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
exit_count++;
}
if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
continue;
}
@ -751,44 +810,76 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used)
UNSET_BIT(used, i);
}
}
*exit_count_ptr = exit_count;
return count;
}
/* Executor side exits */
static _PyExecutorObject *
allocate_executor(int exit_count, int length)
{
int size = exit_count*sizeof(_PyExitData) + length*sizeof(_PyUOpInstruction);
_PyExecutorObject *res = PyObject_GC_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, size);
if (res == NULL) {
return NULL;
}
res->trace = (_PyUOpInstruction *)(res->exits + exit_count);
res->code_size = length;
res->exit_count = exit_count;
return res;
}
/* Makes an executor from a buffer of uops.
* Account for the buffer having gaps and NOPs by computing a "used"
* bit vector and only copying the used uops. Here "used" means reachable
* and not a NOP.
*/
static _PyExecutorObject *
make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *dependencies)
{
uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 };
int length = compute_used(buffer, used);
_PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length);
int exit_count;
int length = compute_used(buffer, used, &exit_count);
_PyExecutorObject *executor = allocate_executor(exit_count, length+1);
if (executor == NULL) {
return NULL;
}
int dest = length - 1;
/* Initialize exits */
for (int i = 0; i < exit_count; i++) {
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = 0;
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
/* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */
for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) {
if (!BIT_IS_SET(used, i)) {
continue;
}
executor->trace[dest] = buffer[i];
*dest = buffer[i];
int opcode = buffer[i].opcode;
if (opcode == _POP_JUMP_IF_FALSE ||
opcode == _POP_JUMP_IF_TRUE)
{
/* The oparg of the target will already have been set to its new offset */
int oparg = executor->trace[dest].oparg;
executor->trace[dest].oparg = buffer[oparg].oparg;
int oparg = dest->oparg;
dest->oparg = buffer[oparg].oparg;
}
if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
executor->exits[next_exit].target = buffer[i].target;
dest->exit_index = next_exit;
next_exit--;
}
/* Set the oparg to be the destination offset,
* so that we can set the oparg of earlier jumps correctly. */
buffer[i].oparg = dest;
buffer[i].oparg = (uint16_t)(dest - executor->trace);
dest--;
}
assert(dest == -1);
assert(next_exit == -1);
assert(dest == executor->trace);
dest->opcode = _START_EXECUTOR;
dest->operand = (uintptr_t)executor;
_Py_ExecutorInit(executor, dependencies);
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
@ -811,14 +902,40 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
#ifdef _Py_JIT
executor->jit_code = NULL;
executor->jit_size = 0;
if (_PyJIT_Compile(executor, executor->trace, Py_SIZE(executor))) {
if (_PyJIT_Compile(executor, executor->trace, length+1)) {
Py_DECREF(executor);
return NULL;
}
#endif
_PyObject_GC_TRACK(executor);
return executor;
}
static int
init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
{
_Py_SetImmortal(executor);
Py_SET_TYPE(executor, &_PyUOpExecutor_Type);
executor->trace = (_PyUOpInstruction *)executor->exits;
executor->code_size = 1;
executor->exit_count = 0;
_PyUOpInstruction *inst = (_PyUOpInstruction *)&executor->trace[0];
inst->opcode = _COLD_EXIT;
inst->oparg = oparg;
executor->vm_data.valid = true;
for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
assert(executor->vm_data.bloom.bits[i] == 0);
}
#ifdef _Py_JIT
executor->jit_code = NULL;
executor->jit_size = 0;
if (_PyJIT_Compile(executor, executor->trace, 1)) {
return -1;
}
#endif
return 0;
}
static int
uop_optimize(
_PyOptimizerObject *self,
@ -880,13 +997,15 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
// Need a few iterations to settle specializations,
// and to ammortize the cost of optimization.
opt->side_threshold = 16;
opt->backedge_threshold = 16;
return (PyObject *)opt;
}
static void
counter_dealloc(_PyExecutorObject *self) {
PyObject *opt = (PyObject *)self->trace[0].operand;
/* The optimizer is the operand of the second uop. */
PyObject *opt = (PyObject *)self->trace[1].operand;
Py_DECREF(opt);
uop_dealloc(self);
}
@ -894,11 +1013,13 @@ counter_dealloc(_PyExecutorObject *self) {
PyTypeObject _PyCounterExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "counting_executor",
.tp_basicsize = offsetof(_PyExecutorObject, trace),
.tp_itemsize = sizeof(_PyUOpInstruction),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_basicsize = offsetof(_PyExecutorObject, exits),
.tp_itemsize = 1,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
.tp_dealloc = (destructor)counter_dealloc,
.tp_methods = executor_methods,
.tp_traverse = executor_traverse,
.tp_clear = executor_clear,
};
static int
@ -926,9 +1047,7 @@ counter_optimize(
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
};
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);
_PyExecutorObject *executor = make_executor_from_uops(buffer, &empty);
_PyExecutorObject *executor = make_executor_from_uops(buffer, &EMPTY_FILTER);
if (executor == NULL) {
return -1;
}
@ -968,6 +1087,7 @@ PyUnstable_Optimizer_NewCounter(void)
}
opt->base.optimize = counter_optimize;
opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
@ -1091,9 +1211,6 @@ link_executor(_PyExecutorObject *executor)
static void
unlink_executor(_PyExecutorObject *executor)
{
if (!executor->vm_data.valid) {
return;
}
_PyExecutorLinkListNode *links = &executor->vm_data.links;
_PyExecutorObject *next = links->next;
_PyExecutorObject *prev = links->previous;
@ -1114,7 +1231,7 @@ unlink_executor(_PyExecutorObject *executor)
/* This must be called by optimizers before using the executor */
void
_Py_ExecutorInit(_PyExecutorObject *executor, _PyBloomFilter *dependency_set)
_Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
{
executor->vm_data.valid = true;
for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
@ -1127,11 +1244,19 @@ _Py_ExecutorInit(_PyExecutorObject *executor, _PyBloomFilter *dependency_set)
void
_Py_ExecutorClear(_PyExecutorObject *executor)
{
if (!executor->vm_data.valid) {
return;
}
unlink_executor(executor);
PyCodeObject *code = executor->vm_data.code;
if (code == NULL) {
return;
}
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_DECREF(executor->exits[i].executor);
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = INT16_MIN;
}
_Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
assert(instruction->op.code == ENTER_EXECUTOR);
int index = instruction->op.arg;

View file

@ -1261,7 +1261,9 @@ init_interp_main(PyThreadState *tstate)
if (opt == NULL) {
return _PyStatus_ERR("can't initialize optimizer");
}
PyUnstable_SetOptimizer((_PyOptimizerObject *)opt);
if (PyUnstable_SetOptimizer((_PyOptimizerObject *)opt)) {
return _PyStatus_ERR("can't initialize optimizer");
}
Py_DECREF(opt);
}
}

View file

@ -782,6 +782,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
}
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
assert(old != NULL);
Py_DECREF(old);
/* It is possible that any of the objects below have a finalizer
@ -1346,6 +1347,7 @@ init_threadstate(_PyThreadStateImpl *_tstate,
tstate->datastack_top = NULL;
tstate->datastack_limit = NULL;
tstate->what_event = -1;
tstate->previous_executor = NULL;
#ifdef Py_GIL_DISABLED
// Initialize biased reference counting inter-thread queue

150
Python/tier2_engine.md Normal file
View file

@ -0,0 +1,150 @@
# The tier 2 execution engine
## General idea
When execution in tier 1 becomes "hot", that is the counter for that point in
the code reaches some threshold, we create an executor and execute that
instead of the tier 1 bytecode.
Since each executor must exit, we also track the "hotness" of those
exits and attach new executors to those exits.
As the program executes, and the hot parts of the program get optimized,
a graph of executors forms.
## Superblocks and Executors
Once a point in the code has become hot enough, we want to optimize it.
Starting from that point we project the likely path of execution,
using information gathered by tier 1 to guide that projection to
form a "superblock", a mostly linear sequence of micro-ops.
Although mostly linear, it may include a single loop.
We then optimize this superblock to form an optimized superblock,
which is equivalent but more efficient.
A superblock is a representation of the code we want to execute,
but it is not in executable form.
The executable form is known as an executor.
Executors are semantically equivalent to the superblock they are
created from, but are in a form that can be efficiently executable.
There are two execution engines for executors, and two types of executors:
* The hardware which runs machine code executors created by the JIT compiler.
* The tier 2 interpreter runs bytecode executors.
It would be very wasteful to support both a tier 2 interpreter and
JIT compiler in the same process.
For now, we will make the choice of engine a configuration option,
but we could make it a command line option in the future if that would prove useful.
### Tier 2 Interpreter
For platforms without a JIT and for testing, we need an interpreter
for executors. It is similar in design to the tier 1 interpreter, but has a
different instruction set, and does not adapt.
### JIT compiler
The JIT compiler converts superblocks into machine code executors.
These have identical behavior to interpreted executors, except that
they consume more memory for the generated machine code and are a lot faster.
## Transfering control
There are three types of control transfer that we need to consider:
* Tier 1 to tier 2
* Tier 2 to tier 1
* One executor to another within tier 2
Since we expect the graph of executors to span most of the hot
part of the program, transfers from one executor to another should
be the most common.
Therefore, we want to make those transfers fast.
### Tier 2 to tier 2
#### Cold exits
All side exits start cold and most stay cold, but a few become
hot. We want to keep the memory consumption small for the many
cold exits, but those that become hot need to be fast.
However we cannot know in advance, which will be which.
So that tier 2 to tier 2 transfers are fast for hot exits,
exits must be implemented as executors. In order to patch
executor exits when they get hot, a pointer to the current
executor must be passed to the exit executor.
#### Handling reference counts
There must be an implicit reference to the currently executing
executor, otherwise it might be freed.
Consequently, we must increment the reference count of an
executor just before executing it, and decrement it just after
executing it.
We want to minimize the amount of data that is passed from
one executor to the next. In the JIT, this reduces the number
of arguments in the tailcall, freeing up registers for other uses.
It is less important in the interpreter, but following the same
design as the JIT simplifies debugging and is good for performance.
Provided that we incref the new executor before executing it, we
can jump directly to the code of the executor, without needing
to pass a reference to that executor object.
However, we do need a reference to the previous executor,
so that it can be decref'd and for handling of cold exits.
To avoid messing up the JIT's register allocation, we pass a
reference to the previous executor in the thread state's
`previous_executor` field.
#### The interpreter
The tier 2 interpreter has a variable `current_executor` which
points to the currently live executor. When transfering from executor
`A` to executor `B` we do the following:
(Initially `current_executor` points to `A`, and the refcount of
`A` is elevated by one)
1. Set the instruction pointer to start at the beginning of `B`
2. Increment the reference count of `B`
3. Start executing `B`
We also make the first instruction in `B` do the following:
1. Set `current_executor` to point to `B`
2. Decrement the reference count of `A` (`A` is referenced by `tstate->previous_executor`)
The net effect of the above is to safely decrement the refcount of `A`,
increment the refcount of `B` and set `current_executor` to point to `B`.
#### In the JIT
Transfering control from one executor to another is done via tailcalls.
The compiled executor should do the same, except that there is no local
variable `current_executor`.
### Tier 1 to tier 2
Since the executor doesn't know if the previous code was tier 1 or tier 2,
we need to make a transfer from tier 1 to tier 2 look like a tier 2 to tier 2
transfer to the executor.
We can then perform a tier 1 to tier 2 transfer by setting `current_executor`
to `None`, and then performing a tier 2 to tier 2 transfer as above.
### Tier 2 to tier 1
Each micro-op that might exit to tier 1 contains a `target` value,
which is the offset of the tier 1 instruction to exit to in the
current code object.
## Counters
TO DO.
The implementation will change soon, so there is no point in
documenting it until then.

View file

@ -1053,8 +1053,6 @@
break;
}
/* _JUMP_BACKWARD is not a viable micro-op for tier 2 */
/* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */
/* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */
@ -1739,6 +1737,18 @@
break;
}
case _COLD_EXIT: {
break;
}
case _START_EXECUTOR: {
break;
}
case _FATAL_ERROR: {
break;
}
case _CHECK_VALIDITY_AND_SET_IP: {
break;
}

View file

@ -321,6 +321,7 @@ def clean_lines(text):
_abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
_abs('Objects/typeobject.c'): (35_000, 200),
_abs('Python/compile.c'): (20_000, 500),
_abs('Python/optimizer.c'): (100_000, 5_000),
_abs('Python/parking_lot.c'): (40_000, 1000),
_abs('Python/pylifecycle.c'): (500_000, 5000),
_abs('Python/pystate.c'): (500_000, 5000),

View file

@ -382,6 +382,11 @@ Python/optimizer.c - _PyCounterOptimizer_Type -
Python/optimizer.c - _PyUOpExecutor_Type -
Python/optimizer.c - _PyUOpOptimizer_Type -
Python/optimizer.c - _PyOptimizer_Default -
Python/optimizer.c - _ColdExit_Type -
Python/optimizer.c - COLD_EXITS -
Python/optimizer.c - Py_FatalErrorExecutor -
Python/optimizer.c - EMPTY_FILTER -
Python/optimizer.c - cold_exits_initialized -
##-----------------------
## test code

Can't render this file because it has a wrong number of fields in line 4.

View file

@ -21,6 +21,7 @@ class Properties:
uses_co_names: bool
uses_locals: bool
has_free: bool
side_exit: bool
pure: bool
passthrough: bool
@ -48,6 +49,7 @@ def from_list(properties: list["Properties"]) -> "Properties":
uses_co_names=any(p.uses_co_names for p in properties),
uses_locals=any(p.uses_locals for p in properties),
has_free=any(p.has_free for p in properties),
side_exit=any(p.side_exit for p in properties),
pure=all(p.pure for p in properties),
passthrough=all(p.passthrough for p in properties),
)
@ -69,6 +71,7 @@ def from_list(properties: list["Properties"]) -> "Properties":
uses_co_names=False,
uses_locals=False,
has_free=False,
side_exit=False,
pure=False,
passthrough=False,
)
@ -269,9 +272,7 @@ def override_error(
def convert_stack_item(item: parser.StackEffect) -> StackItem:
return StackItem(
item.name, item.type, item.cond, (item.size or "1")
)
return StackItem(item.name, item.type, item.cond, (item.size or "1"))
def analyze_stack(op: parser.InstDef) -> StackEffect:
@ -448,13 +449,24 @@ def compute_properties(op: parser.InstDef) -> Properties:
or variable_used(op, "PyCell_GET")
or variable_used(op, "PyCell_SET")
)
deopts_if = variable_used(op, "DEOPT_IF")
exits_if = variable_used(op, "EXIT_IF")
if deopts_if and exits_if:
tkn = op.tokens[0]
raise lexer.make_syntax_error(
"Op cannot contain both EXIT_IF and DEOPT_IF",
tkn.filename,
tkn.line,
tkn.column,
op.name,
)
infallible = is_infallible(op)
deopts = variable_used(op, "DEOPT_IF")
passthrough = stack_effect_only_peeks(op) and infallible
return Properties(
escapes=makes_escaping_api_call(op),
infallible=infallible,
deopts=deopts,
deopts=deopts_if or exits_if,
side_exit=exits_if,
oparg=variable_used(op, "oparg"),
jumps=variable_used(op, "JUMPBY"),
eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"),

View file

@ -154,6 +154,7 @@ def replace_check_eval_breaker(
REPLACEMENT_FUNCTIONS = {
"EXIT_IF": replace_deopt,
"DEOPT_IF": replace_deopt,
"ERROR_IF": replace_error,
"DECREF_INPUTS": replace_decrefs,
@ -205,6 +206,8 @@ def cflags(p: Properties) -> str:
flags.append("HAS_EVAL_BREAK_FLAG")
if p.deopts:
flags.append("HAS_DEOPT_FLAG")
if p.side_exit:
flags.append("HAS_EXIT_FLAG")
if not p.infallible:
flags.append("HAS_ERROR_FLAG")
if p.escapes:

View file

@ -50,6 +50,7 @@
"DEOPT",
"ERROR",
"ESCAPES",
"EXIT",
"PURE",
"PASSTHROUGH",
]

View file

@ -98,9 +98,25 @@ def tier2_replace_deopt(
out.emit(") goto deoptimize;\n")
def tier2_replace_exit_if(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
unused: Stack,
inst: Instruction | None,
) -> None:
out.emit_at("if ", tkn)
out.emit(next(tkn_iter))
emit_to(out, tkn_iter, "RPAREN")
next(tkn_iter) # Semi colon
out.emit(") goto side_exit;\n")
TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy()
TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error
TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt
TIER2_REPLACEMENT_FUNCTIONS["EXIT_IF"] = tier2_replace_exit_if
def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None:

View file

@ -38,6 +38,20 @@
goto LABEL ## _tier_two; \
} while (0)
#undef GOTO_TIER_TWO
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
__attribute__((musttail)) \
return ((jit_func)((EXECUTOR)->jit_code))(frame, stack_pointer, tstate); \
} while (0)
#undef GOTO_TIER_ONE
#define GOTO_TIER_ONE(TARGET) \
do { \
_PyFrame_SetStackPointer(frame, stack_pointer); \
return TARGET; \
} while (0)
#undef LOAD_IP
#define LOAD_IP(UNUSED) \
do { \
@ -59,7 +73,6 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *
PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR)
int oparg;
int opcode = _JIT_OPCODE;
_PyUOpInstruction *next_uop;
// Other stuff we need handy:
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
@ -90,9 +103,16 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *
pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
_PyFrame_SetStackPointer(frame, stack_pointer);
return NULL;
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_ONE(NULL);
deoptimize:
_PyFrame_SetStackPointer(frame, stack_pointer);
return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target;
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_ONE(_PyCode_CODE(_PyFrame_GetCode(frame)) + _target);
side_exit:
{
_PyExitData *exit = &current_executor->exits[_target];
Py_INCREF(exit->executor);
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_TWO(exit->executor);
}
}