From a9da085015db8cbb81f660158864ac94fe6c67a2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Feb 2022 08:48:16 -0800 Subject: [PATCH] bpo-46702: Specialize UNPACK_SEQUENCE (GH-31240) --- Include/internal/pycore_code.h | 2 + Include/opcode.h | 14 ++- Lib/opcode.py | 4 + .../2022-02-09-16-36-11.bpo-46702.LcaEuC.rst | 2 + Python/ceval.c | 99 ++++++++++++------- Python/opcode_targets.h | 18 ++-- Python/specialize.c | 92 ++++++++++------- 7 files changed, 148 insertions(+), 83 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-02-09-16-36-11.bpo-46702.LcaEuC.rst diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2d8fe20e1a6..ead9541c298 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -276,6 +276,8 @@ int _Py_Specialize_CallNoKw(PyObject *callable, _Py_CODEUNIT *instr, int nargs, void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, + SpecializedCacheEntry *cache); /* Deallocator function for static codeobjects used in deepfreeze.py */ void _PyStaticCode_Dealloc(PyCodeObject *co); diff --git a/Include/opcode.h b/Include/opcode.h index 58fc6280893..df45e7b5e5a 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -169,11 +169,15 @@ extern "C" { #define STORE_ATTR_INSTANCE_VALUE 81 #define STORE_ATTR_SLOT 131 #define STORE_ATTR_WITH_HINT 140 -#define LOAD_FAST__LOAD_FAST 141 -#define STORE_FAST__LOAD_FAST 143 -#define LOAD_FAST__LOAD_CONST 150 -#define LOAD_CONST__LOAD_FAST 153 -#define STORE_FAST__STORE_FAST 154 +#define UNPACK_SEQUENCE_ADAPTIVE 141 +#define UNPACK_SEQUENCE_LIST 143 +#define UNPACK_SEQUENCE_TUPLE 150 +#define UNPACK_SEQUENCE_TWO_TUPLE 153 +#define LOAD_FAST__LOAD_FAST 154 +#define STORE_FAST__LOAD_FAST 158 +#define LOAD_FAST__LOAD_CONST 159 +#define LOAD_CONST__LOAD_FAST 161 +#define STORE_FAST__STORE_FAST 166 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index a1f0c6e4326..8237aa7fa3d 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -283,6 +283,10 @@ def jabs_op(name, op): "STORE_ATTR_INSTANCE_VALUE", "STORE_ATTR_SLOT", "STORE_ATTR_WITH_HINT", + "UNPACK_SEQUENCE_ADAPTIVE", + "UNPACK_SEQUENCE_LIST", + "UNPACK_SEQUENCE_TUPLE", + "UNPACK_SEQUENCE_TWO_TUPLE", # Super instructions "LOAD_FAST__LOAD_FAST", "STORE_FAST__LOAD_FAST", diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-02-09-16-36-11.bpo-46702.LcaEuC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-02-09-16-36-11.bpo-46702.LcaEuC.rst new file mode 100644 index 00000000000..8fe75852896 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-02-09-16-36-11.bpo-46702.LcaEuC.rst @@ -0,0 +1,2 @@ +Specialize :opcode:`UNPACK_SEQUENCE` for :class:`tuple` and :class:`list` +unpackings. diff --git a/Python/ceval.c b/Python/ceval.c index ad8b05400d5..b900de53c8c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2738,52 +2738,84 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(UNPACK_SEQUENCE) { PREDICTED(UNPACK_SEQUENCE); - PyObject *seq = POP(), *item, **items; -#ifdef Py_STATS - extern int _PySpecialization_ClassifySequence(PyObject *, int); - _py_stats.opcode_stats[UNPACK_SEQUENCE].specialization.failure++; - _py_stats.opcode_stats[UNPACK_SEQUENCE].specialization. - failure_kinds[_PySpecialization_ClassifySequence(seq, oparg)]++; -#endif - if (PyTuple_CheckExact(seq) && - PyTuple_GET_SIZE(seq) == oparg) { - items = ((PyTupleObject *)seq)->ob_item; - while (oparg--) { - item = items[oparg]; - Py_INCREF(item); - PUSH(item); - } - } else if (PyList_CheckExact(seq) && - PyList_GET_SIZE(seq) == oparg) { - items = ((PyListObject *)seq)->ob_item; - while (oparg--) { - item = items[oparg]; - Py_INCREF(item); - PUSH(item); - } - } else if (unpack_iterable(tstate, seq, oparg, -1, - stack_pointer + oparg)) { - STACK_GROW(oparg); - } else { - /* unpack_iterable() raised an exception */ + PyObject *seq = POP(); + PyObject **top = stack_pointer + oparg; + if (!unpack_iterable(tstate, seq, oparg, -1, top)) { Py_DECREF(seq); goto error; } + STACK_GROW(oparg); Py_DECREF(seq); DISPATCH(); } + TARGET(UNPACK_SEQUENCE_ADAPTIVE) { + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *seq = TOP(); + next_instr--; + _Py_Specialize_UnpackSequence(seq, next_instr, cache); + DISPATCH(); + } + else { + STAT_INC(UNPACK_SEQUENCE, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE); + } + } + + TARGET(UNPACK_SEQUENCE_TWO_TUPLE) { + PyObject *seq = TOP(); + DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); + DEOPT_IF(PyTuple_GET_SIZE(seq) != 2, UNPACK_SEQUENCE); + STAT_INC(UNPACK_SEQUENCE, hit); + SET_TOP(Py_NewRef(PyTuple_GET_ITEM(seq, 1))); + PUSH(Py_NewRef(PyTuple_GET_ITEM(seq, 0))); + Py_DECREF(seq); + NOTRACE_DISPATCH(); + } + + TARGET(UNPACK_SEQUENCE_TUPLE) { + PyObject *seq = TOP(); + int len = GET_CACHE()->adaptive.original_oparg; + DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); + DEOPT_IF(PyTuple_GET_SIZE(seq) != len, UNPACK_SEQUENCE); + STAT_INC(UNPACK_SEQUENCE, hit); + STACK_SHRINK(1); + PyObject **items = _PyTuple_ITEMS(seq); + while (len--) { + PUSH(Py_NewRef(items[len])); + } + Py_DECREF(seq); + NOTRACE_DISPATCH(); + } + + TARGET(UNPACK_SEQUENCE_LIST) { + PyObject *seq = TOP(); + int len = GET_CACHE()->adaptive.original_oparg; + DEOPT_IF(!PyList_CheckExact(seq), UNPACK_SEQUENCE); + DEOPT_IF(PyList_GET_SIZE(seq) != len, UNPACK_SEQUENCE); + STAT_INC(UNPACK_SEQUENCE, hit); + STACK_SHRINK(1); + PyObject **items = _PyList_ITEMS(seq); + while (len--) { + PUSH(Py_NewRef(items[len])); + } + Py_DECREF(seq); + NOTRACE_DISPATCH(); + } + TARGET(UNPACK_EX) { int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); PyObject *seq = POP(); - - if (unpack_iterable(tstate, seq, oparg & 0xFF, oparg >> 8, - stack_pointer + totalargs)) { - stack_pointer += totalargs; - } else { + PyObject **top = stack_pointer + totalargs; + if (!unpack_iterable(tstate, seq, oparg & 0xFF, oparg >> 8, top)) { Py_DECREF(seq); goto error; } + STACK_GROW(totalargs); Py_DECREF(seq); DISPATCH(); } @@ -5396,6 +5428,7 @@ MISS_WITH_CACHE(CALL) MISS_WITH_CACHE(BINARY_OP) MISS_WITH_CACHE(COMPARE_OP) MISS_WITH_CACHE(BINARY_SUBSCR) +MISS_WITH_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) binary_subscr_dict_error: diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f47da2bbb1e..1e137f93c72 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -140,32 +140,32 @@ static void *opcode_targets[256] = { &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_UNPACK_SEQUENCE_ADAPTIVE, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_UNPACK_SEQUENCE_LIST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, &&TARGET_COPY_FREE_VARS, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_UNPACK_SEQUENCE_TUPLE, &&TARGET_RESUME, &&TARGET_MATCH_CLASS, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_UNPACK_SEQUENCE_TWO_TUPLE, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_METHOD, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LIST_EXTEND, &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_PRECALL_FUNCTION, &&TARGET_PRECALL_METHOD, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 0e1ffad1b91..ab2363cab9f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -51,6 +51,7 @@ static uint8_t adaptive_opcodes[256] = { [STORE_ATTR] = STORE_ATTR_ADAPTIVE, [BINARY_OP] = BINARY_OP_ADAPTIVE, [COMPARE_OP] = COMPARE_OP_ADAPTIVE, + [UNPACK_SEQUENCE] = UNPACK_SEQUENCE_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ @@ -64,6 +65,7 @@ static uint8_t cache_requirements[256] = { [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ + [UNPACK_SEQUENCE] = 1, // _PyAdaptiveEntry }; Py_ssize_t _Py_QuickenedCount = 0; @@ -155,6 +157,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, CALL, "call"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); err += add_stat_dict(stats, COMPARE_OP, "compare_op"); + err += add_stat_dict(stats, UNPACK_SEQUENCE, "unpack_sequence"); if (err < 0) { Py_DECREF(stats); return NULL; @@ -607,27 +610,10 @@ initial_counter_value(void) { #define SPEC_FAIL_FOR_ITER_DICT_VALUES 22 #define SPEC_FAIL_FOR_ITER_ENUMERATE 23 -/* UNPACK_SEQUENCE */ -#define SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_0 9 -#define SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_1 10 -#define SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_2 11 -#define SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_3 12 -#define SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_4 13 -#define SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_N 14 +// UNPACK_SEQUENCE -#define SPEC_FAIL_UNPACK_SEQUENCE_LIST_0 15 -#define SPEC_FAIL_UNPACK_SEQUENCE_LIST_1 16 -#define SPEC_FAIL_UNPACK_SEQUENCE_LIST_2 17 -#define SPEC_FAIL_UNPACK_SEQUENCE_LIST_3 18 -#define SPEC_FAIL_UNPACK_SEQUENCE_LIST_4 19 -#define SPEC_FAIL_UNPACK_SEQUENCE_LIST_N 20 - -#define SPEC_FAIL_UNPACK_SEQUENCE_OTHER_0 21 -#define SPEC_FAIL_UNPACK_SEQUENCE_OTHER_1 22 -#define SPEC_FAIL_UNPACK_SEQUENCE_OTHER_2 23 -#define SPEC_FAIL_UNPACK_SEQUENCE_OTHER_3 24 -#define SPEC_FAIL_UNPACK_SEQUENCE_OTHER_4 25 -#define SPEC_FAIL_UNPACK_SEQUENCE_OTHER_N 26 +#define SPEC_FAIL_UNPACK_SEQUENCE_ITERATOR 8 +#define SPEC_FAIL_UNPACK_SEQUENCE_SEQUENCE 9 static int @@ -1949,6 +1935,56 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, adaptive->counter = initial_counter_value(); } +#ifdef Py_STATS +static int +unpack_sequence_fail_kind(PyObject *seq) +{ + if (PySequence_Check(seq)) { + return SPEC_FAIL_UNPACK_SEQUENCE_SEQUENCE; + } + if (PyIter_Check(seq)) { + return SPEC_FAIL_UNPACK_SEQUENCE_ITERATOR; + } + return SPEC_FAIL_OTHER; +} +#endif + +void +_Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, + SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *adaptive = &cache->adaptive; + if (PyTuple_CheckExact(seq)) { + if (PyTuple_GET_SIZE(seq) != adaptive->original_oparg) { + SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); + goto failure; + } + if (PyTuple_GET_SIZE(seq) == 2) { + *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_TWO_TUPLE, + _Py_OPARG(*instr)); + goto success; + } + *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_TUPLE, _Py_OPARG(*instr)); + goto success; + } + if (PyList_CheckExact(seq)) { + if (PyList_GET_SIZE(seq) != adaptive->original_oparg) { + SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); + goto failure; + } + *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_LIST, _Py_OPARG(*instr)); + goto success; + } + SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); +failure: + STAT_INC(UNPACK_SEQUENCE, failure); + cache_backoff(adaptive); + return; +success: + STAT_INC(UNPACK_SEQUENCE, success); + adaptive->counter = initial_counter_value(); +} + #ifdef Py_STATS int @@ -2001,22 +2037,6 @@ int return SPEC_FAIL_OTHER; } -int -_PySpecialization_ClassifySequence(PyObject *seq, int n) -{ - assert(n >= 0); - if (n > 4) { - n = 5; - } - if (PyTuple_CheckExact(seq)) { - return SPEC_FAIL_UNPACK_SEQUENCE_TUPLE_0 + n; - } - if (PyList_CheckExact(seq)) { - return SPEC_FAIL_UNPACK_SEQUENCE_LIST_0 + n; - } - return SPEC_FAIL_UNPACK_SEQUENCE_OTHER_0 + n; -} - int _PySpecialization_ClassifyCallable(PyObject *callable) {