From 34a03e951b027902d993c7066ba8e6b7e92cb2a9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 9 Nov 2023 13:49:51 +0000 Subject: [PATCH] GH-111843: Tier 2 exponential backoff (GH-111850) --- Include/cpython/optimizer.h | 4 ++++ ...-11-05-06-40-35.gh-issue-111843.c045cB.rst | 2 ++ Python/bytecodes.c | 23 +++++++++++++++---- Python/generated_cases.c.h | 23 +++++++++++++++---- Python/optimizer.c | 9 ++++---- 5 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 2a5251b3ecb..adc2c1fc442 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -45,6 +45,8 @@ typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_C typedef struct _PyOptimizerObject { PyObject_HEAD optimize_func optimize; + /* These thresholds are treated as signed so do not exceed INT16_MAX + * Use INT16_MAX to indicate that the optimizer should never be called */ uint16_t resume_threshold; uint16_t backedge_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ @@ -76,6 +78,8 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); #define OPTIMIZER_BITS_IN_COUNTER 4 +/* Minimum of 16 additional executions before retry */ +#define MINIMUM_TIER2_BACKOFF 4 #ifdef __cplusplus } diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst new file mode 100644 index 00000000000..280f8f9bf0b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst @@ -0,0 +1,2 @@ +Use exponential backoff to reduce the number of failed tier 2 optimization +attempts by over 99%. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 701004201e5..d914f27ba9d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2335,10 +2335,12 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - if (this_instr[1].cache > tstate->interp->optimizer_backedge_threshold && - // Double-check that the opcode isn't instrumented or something: - this_instr->op.code == JUMP_BACKWARD) - { + /* We are using unsigned values, but we really want signed values, so + * do the 2s complement comparison manually */ + uint16_t ucounter = this_instr[1].cache + (1 << 15); + uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + // Double-check that the opcode isn't instrumented or something: + if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); ERROR_IF(optimized < 0, error); @@ -2346,8 +2348,19 @@ dummy_func( // Rewind and enter the executor: assert(this_instr->op.code == ENTER_EXECUTOR); next_instr = this_instr; + this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + } + else { + int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < MINIMUM_TIER2_BACKOFF) { + backoff = MINIMUM_TIER2_BACKOFF; + } + else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { + backoff++; + } + assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); + this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; } - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } #endif /* ENABLE_SPECIALIZATION */ } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1c853047a26..b9a2b227549 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3409,10 +3409,12 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - if (this_instr[1].cache > tstate->interp->optimizer_backedge_threshold && - // Double-check that the opcode isn't instrumented or something: - this_instr->op.code == JUMP_BACKWARD) - { + /* We are using unsigned values, but we really want signed values, so + * do the 2s complement comparison manually */ + uint16_t ucounter = this_instr[1].cache + (1 << 15); + uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + // Double-check that the opcode isn't instrumented or something: + if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); if (optimized < 0) goto error; @@ -3420,8 +3422,19 @@ // Rewind and enter the executor: assert(this_instr->op.code == ENTER_EXECUTOR); next_instr = this_instr; + this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + } + else { + int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < MINIMUM_TIER2_BACKOFF) { + backoff = MINIMUM_TIER2_BACKOFF; + } + else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { + backoff++; + } + assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); + this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; } - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 42279be3c1f..e142bd0488a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -107,6 +107,7 @@ error_optimize( _PyExecutorObject **exec, int Py_UNUSED(stack_entries)) { + assert(0); PyErr_Format(PyExc_SystemError, "Should never call error_optimize"); return -1; } @@ -122,8 +123,8 @@ PyTypeObject _PyDefaultOptimizer_Type = { _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = error_optimize, - .resume_threshold = UINT16_MAX, - .backedge_threshold = UINT16_MAX, + .resume_threshold = INT16_MAX, + .backedge_threshold = INT16_MAX, }; _PyOptimizerObject * @@ -309,7 +310,7 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; - opt->base.resume_threshold = UINT16_MAX; + opt->base.resume_threshold = INT16_MAX; opt->base.backedge_threshold = 0; opt->count = 0; return (PyObject *)opt; @@ -915,7 +916,7 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - opt->resume_threshold = UINT16_MAX; + opt->resume_threshold = INT16_MAX; // Need at least 3 iterations to settle specializations. // A few lower bits of the counter are reserved for other flags. opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;