GH-93354: Use exponential backoff to avoid excessive specialization attempts. (GH-93355)

This commit is contained in:
Mark Shannon 2022-05-31 11:58:26 +01:00 committed by GitHub
parent a565ab0fd5
commit eb618d5ff0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 93 additions and 46 deletions

View file

@ -227,9 +227,6 @@ extern void _PyLineTable_InitAddressRange(
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
#define ADAPTIVE_CACHE_BACKOFF 64
/* Specialization functions */
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
@ -423,6 +420,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
}
/** Counters
* The first 16-bit value in each inline cache is a counter.
* When counting misses, the counter is treated as a simple unsigned value.
*
* When counting executions until the next specialization attempt,
* exponential backoff is used to reduce the number of specialization failures.
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
* On a specialization failure, the backoff exponent is incremented and the
* counter set to (2**backoff - 1).
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
*/
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
#define ADAPTIVE_BACKOFF_BITS 4
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
#define ADAPTIVE_BACKOFF_START 5
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
static inline uint16_t
adaptive_counter_bits(int value, int backoff) {
return (value << ADAPTIVE_BACKOFF_BITS) |
(backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
}
static inline uint16_t
adaptive_counter_start(void) {
unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
}
static inline uint16_t
adaptive_counter_backoff(uint16_t counter) {
unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
backoff++;
if (backoff > MAX_BACKOFF_VALUE) {
backoff = MAX_BACKOFF_VALUE;
}
unsigned int value = (1 << backoff) - 1;
return adaptive_counter_bits(value, backoff);
}
#ifdef __cplusplus
}
#endif

View file

@ -0,0 +1,3 @@
Use exponential backoff for specialization counters in the interpreter. Can
reduce the number of failed specializations significantly and avoid slowdown
for those parts of a program that are not suitable for specialization.

View file

@ -1561,7 +1561,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
dtrace_function_entry(frame); \
}
#define ADAPTIVE_COUNTER_IS_ZERO(cache) \
(cache)->counter < (1<<ADAPTIVE_BACKOFF_BITS)
#define DECREMENT_ADAPTIVE_COUNTER(cache) \
(cache)->counter -= (1<<ADAPTIVE_BACKOFF_BITS)
static int
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
@ -2156,7 +2160,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(BINARY_SUBSCR_ADAPTIVE) {
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
@ -2167,7 +2171,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
}
@ -2321,7 +2325,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(STORE_SUBSCR_ADAPTIVE) {
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
@ -2332,7 +2336,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(STORE_SUBSCR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(STORE_SUBSCR);
}
}
@ -2815,7 +2819,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(UNPACK_SEQUENCE_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *seq = TOP();
next_instr--;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
@ -2823,7 +2827,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(UNPACK_SEQUENCE, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE);
}
}
@ -3056,7 +3060,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_GLOBAL_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *name = GETITEM(names, oparg>>1);
next_instr--;
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) {
@ -3066,7 +3070,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_GLOBAL, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
}
}
@ -3480,7 +3484,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
@ -3491,7 +3495,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_ATTR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_ATTR);
}
}
@ -3589,7 +3593,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(STORE_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
@ -3600,7 +3604,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(STORE_ATTR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(STORE_ATTR);
}
}
@ -3719,7 +3723,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(COMPARE_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *right = TOP();
PyObject *left = SECOND();
next_instr--;
@ -3728,7 +3732,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(COMPARE_OP, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(COMPARE_OP);
}
}
@ -4526,7 +4530,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_METHOD_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
@ -4537,7 +4541,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_METHOD, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_METHOD);
}
}
@ -4775,7 +4779,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(CALL_ADAPTIVE) {
_PyCallCache *cache = (_PyCallCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
next_instr--;
int is_meth = is_method(stack_pointer, oparg);
int nargs = oparg + is_meth;
@ -4789,7 +4793,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(CALL, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
goto call_function;
}
}
@ -5521,7 +5525,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(BINARY_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *lhs = SECOND();
PyObject *rhs = TOP();
next_instr--;
@ -5530,7 +5534,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(BINARY_OP, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(BINARY_OP);
}
}
@ -5658,7 +5662,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(adaptive_opcode);
_Py_SET_OPCODE(next_instr[-1], adaptive_opcode);
STAT_INC(opcode, deopt);
*counter = ADAPTIVE_CACHE_BACKOFF;
*counter = adaptive_counter_start();
}
next_instr--;
DISPATCH_GOTO();

View file

@ -321,7 +321,7 @@ _PyCode_Quicken(PyCodeObject *code)
}
static inline int
initial_counter_value(void) {
miss_counter_start(void) {
/* Starting value for the counter.
* This value needs to be not too low, otherwise
* it would cause excessive de-optimization.
@ -743,12 +743,12 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name)
fail:
STAT_INC(LOAD_ATTR, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return 0;
success:
STAT_INC(LOAD_ATTR, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
return 0;
}
@ -825,12 +825,12 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name)
fail:
STAT_INC(STORE_ATTR, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return 0;
success:
STAT_INC(STORE_ATTR, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
return 0;
}
@ -1041,14 +1041,13 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name)
success:
STAT_INC(LOAD_METHOD, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
return 0;
fail:
STAT_INC(LOAD_METHOD, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return 0;
}
int
@ -1124,12 +1123,12 @@ _Py_Specialize_LoadGlobal(
fail:
STAT_INC(LOAD_GLOBAL, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return 0;
success:
STAT_INC(LOAD_GLOBAL, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
return 0;
}
@ -1253,12 +1252,12 @@ _Py_Specialize_BinarySubscr(
fail:
STAT_INC(BINARY_SUBSCR, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return 0;
success:
STAT_INC(BINARY_SUBSCR, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
return 0;
}
@ -1357,12 +1356,12 @@ _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *ins
fail:
STAT_INC(STORE_SUBSCR, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return 0;
success:
STAT_INC(STORE_SUBSCR, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
return 0;
}
@ -1674,12 +1673,12 @@ _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
if (fail) {
STAT_INC(CALL, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
}
else {
STAT_INC(CALL, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
}
return 0;
}
@ -1827,11 +1826,11 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
}
SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs));
STAT_INC(BINARY_OP, failure);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(BINARY_OP, success);
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
}
@ -1953,11 +1952,11 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs));
failure:
STAT_INC(COMPARE_OP, failure);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(COMPARE_OP, success);
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
}
#ifdef Py_STATS
@ -2003,11 +2002,11 @@ _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg)
SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq));
failure:
STAT_INC(UNPACK_SEQUENCE, failure);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(UNPACK_SEQUENCE, success);
cache->counter = initial_counter_value();
cache->counter = miss_counter_start();
}
#ifdef Py_STATS