[vm] Introduce cachable idempotent calls

Adds a `CachableIdempotentCallInstr` that can be invoked via
`@pragma('vm:cachable-idempotent')` if the call-sites is force
optimized.

The object pool is not visited by the scavenger. So, we store the
results as unboxed integers. Consequently, only dart functions that
return integers can be cached.

Cachable idempotent calls should never be inlined. After the first
call the function not be called again.

The call itself is on a slow path to avoid register spilling on the
fast path.

TEST=vm/cc/IRTest_CachableIdempotentCall
TEST=runtime/tests/vm/dart/cachable_idempotent_test.dart

Bug: https://github.com/dart-lang/sdk/issues/51618
Change-Id: I612e896f27add76f57796c060157e14cc687a0fd
Cq-Include-Trybots: luci.dart.try:vm-aot-android-release-arm64c-try,vm-aot-android-release-arm_x64-try,vm-aot-asan-linux-release-x64-try,vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-aot-msan-linux-release-x64-try,vm-aot-obfuscate-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-aot-ubsan-linux-release-x64-try,vm-aot-win-debug-arm64-try,vm-aot-win-debug-x64c-try,vm-aot-win-release-x64-try,vm-appjit-linux-debug-x64-try,vm-asan-linux-release-x64-try,vm-checked-mac-release-arm64-try,vm-eager-optimization-linux-release-ia32-try,vm-eager-optimization-linux-release-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-msan-linux-release-x64-try,vm-reload-linux-debug-x64-try,vm-reload-rollback-linux-debug-x64-try,vm-tsan-linux-release-x64-try,vm-ubsan-linux-release-x64-try,vm-win-debug-arm64-try,vm-win-debug-x64-try,vm-win-debug-x64c-try,vm-win-release-ia32-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/301601
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
This commit is contained in:
Daco Harkes 2023-10-27 07:55:47 +00:00
parent db3fdddd07
commit 0cd55a18c1
31 changed files with 779 additions and 38 deletions

View file

@ -38,6 +38,8 @@ These pragmas can cause unsound behavior if used incorrectly and therefore are o
| `vm:exact-result-type` | [Declaring an exact result type of a method](compiler/pragmas_recognized_by_compiler.md#providing-an-exact-result-type) |
| `vm:recognized` | [Marking this as a recognized method](compiler/pragmas_recognized_by_compiler.md#marking-recognized-methods) |
| `vm:idempotent` | Method marked with this pragma can be repeated or restarted multiple times without change to its effect. Loading, storing of memory values are examples of this, while reads and writes from file are examples of non-idempotent methods. At present, use of this pragma is limited to driving inlining of force-optimized functions. |
| `vm:cachable-idempotent` | Functions marked with this pragma will have their call site cache the return value. Not supported in ia32. Call site must have the pragma `vm:force-optimze`. |
| `vm:force-optimze` | Functions marked with this pragma will be compiled with the optimized pipeline and may not deoptimize. |
## Pragmas ignored in user code

View file

@ -0,0 +1,236 @@
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:ffi';
import 'package:expect/expect.dart';
void main() {
testMultipleIncrement();
reset();
testMultipleCallSites();
reset();
testManyArguments();
reset();
testNonIntArguments();
reset();
testLargeInt();
reset();
testIntArguments();
reset();
testDoubleArguments();
print('done');
}
@pragma('vm:force-optimize')
void testMultipleIncrement() {
int result = 0;
final counter = makeCounter(100000);
while (counter()) {
// We this calls with a cacheable call,
// which will lead to the counter no longer being incremented.
// Make sure to return the value, so we can see that the boxing and
// unboxing works as expected.
result = cachedIncrement(/*must be const*/ 3);
}
// Since this call site is force optimized, we should never recompile and thus
// we only ever increment the global counter once.
Expect.equals(3, result);
}
/// A global counter, except for the call sites are being cached.
///
/// Arguments passed to this function must be const.
/// Call sites should be rewritten to cache using the pool.
@pragma('vm:never-inline')
@pragma('vm:cachable-idempotent')
int cachedIncrement(int amount) {
return _globalCounter += amount;
}
int _globalCounter = 0;
void reset() {
print('reset');
_globalCounter = 0;
}
/// Helper for vm:force-optimize for loops without instance calls.
///
/// A for loop uses the `operator+` on int.
bool Function() makeCounter(int count) {
return () => count-- >= 0;
}
@pragma('vm:force-optimize')
void testMultipleCallSites() {
int result = 0;
final counter = makeCounter(10);
result = cachedIncrement(1);
while (counter()) {
result = cachedIncrement(10);
result = cachedIncrement(10);
}
result = cachedIncrement(100);
// All call sites are cached individually.
// Even if the arguments are identical.
Expect.equals(result, 121);
}
@pragma('vm:force-optimize')
void testManyArguments() {
final result = manyArguments(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
Expect.equals(55, result);
}
@pragma('vm:never-inline')
@pragma('vm:cachable-idempotent')
int manyArguments(int i1, int i2, int i3, int i4, int i5, int i6, int i7,
int i8, int i9, int i10) {
return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10;
}
@pragma('vm:force-optimize')
void testNonIntArguments() {
final result = lotsOfConstArguments(
"foo",
3.0,
3,
const _MyClass(_MyClass(42)),
);
Expect.equals(37, result);
}
@pragma('vm:never-inline')
@pragma('vm:cachable-idempotent')
int lotsOfConstArguments(String s, double d, int i, _MyClass m) {
return [s, d, i, m].toString().length;
}
final class _MyClass {
final Object i;
const _MyClass(this.i);
@override
String toString() => '_MyClass($i)';
}
@pragma('vm:force-optimize')
void testLargeInt() {
final counter = makeCounter(10);
while (counter()) {
if (is64bitsArch()) {
final result1 = cachedIncrement(0x7FFFFFFFFFFFFFFF);
Expect.equals(0x7FFFFFFFFFFFFFFF, result1);
_globalCounter = 0;
final result2 = cachedIncrement(0x8000000000000000);
Expect.equals(0x8000000000000000, result2);
_globalCounter = 0;
final result3 = cachedIncrement(0xFFFFFFFFFFFFFFFF);
Expect.equals(0xFFFFFFFFFFFFFFFF, result3);
} else {
final result1 = cachedIncrement(0x7FFFFFFF);
Expect.equals(0x7FFFFFFF, result1);
_globalCounter = 0;
final result2 = cachedIncrement(0x80000000);
Expect.equals(0x80000000, result2);
_globalCounter = 0;
final result3 = cachedIncrement(0xFFFFFFFF);
Expect.equals(0xFFFFFFFF, result3);
}
}
}
bool is64bitsArch() => sizeOf<Pointer>() == 8;
@pragma('vm:force-optimize')
void testIntArguments() {
final result = lotsOfIntArguments(
1,
2,
3,
4,
5,
6,
7,
8,
);
Expect.equals(36, result);
// Do a second call with different values to prevent the argument values
// propagating to the function body in TFA.
final result2 = lotsOfIntArguments(
101,
102,
103,
104,
105,
106,
107,
108,
);
Expect.equals(836, result2);
}
@pragma('vm:never-inline')
@pragma('vm:cachable-idempotent')
int lotsOfIntArguments(
int d1,
int d2,
int d3,
int d4,
int d5,
int d6,
int d7,
int d8,
) {
print([d1, d2, d3, d4, d5, d6, d7, d8]);
return (d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8).floor();
}
@pragma('vm:force-optimize')
void testDoubleArguments() {
final result = lotsOfDoubleArguments(
1.0,
2.0,
3.0,
4.0,
5.0,
6.0,
7.0,
8.0,
);
Expect.equals(36, result);
// Do a second call with different values to prevent the argument values
// propagating to the function body in TFA.
final result2 = lotsOfDoubleArguments(
101.0,
102.0,
103.0,
104.0,
105.0,
106.0,
107.0,
108.0,
);
Expect.equals(836, result2);
}
@pragma('vm:never-inline')
@pragma('vm:cachable-idempotent')
int lotsOfDoubleArguments(
double d1,
double d2,
double d3,
double d4,
double d5,
double d6,
double d7,
double d8,
) {
print([d1, d2, d3, d4, d5, d6, d7, d8]);
return (d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8).floor();
}

View file

@ -32,6 +32,7 @@ dart/snapshot_version_test: Skip # This test is a Dart1 test (script snapshot)
dart/stack_overflow_shared_test: Pass, Slow # Uses --shared-slow-path-triggers-gc flag.
[ $arch == ia32 ]
dart/cachable_idempotent_test: Skip # CachableIdempotent calls are not supported in ia32 because it has no object pool.
dart/disassemble_aot_test: SkipByDesign # IA32 does not support AOT.
dart/regress32597_2_test: Pass, Slow # Uses --optimization-counter-threshold=10 without a kernel service snapshot.
dart/regress38467_test: Pass, Slow # Uses --optimization-counter-threshold=10 without a kernel service snapshot.

View file

@ -3284,6 +3284,9 @@ class ObjectPoolDeserializationCluster : public DeserializationCluster {
static_cast<intptr_t>(switchable_call_miss_entry_point);
continue;
#endif // defined(DART_PRECOMPILED_RUNTIME)
case ObjectPool::SnapshotBehavior::kSetToZero:
entry.raw_value_ = 0;
continue;
default:
FATAL("Unexpected snapshot behavior: %d\n", snapshot_behavior);
}

View file

@ -1569,6 +1569,33 @@ void Assembler::LoadWordFromPoolIndex(Register rd,
}
}
void Assembler::StoreWordToPoolIndex(Register value,
intptr_t index,
Register pp,
Condition cond) {
ASSERT((pp != PP) || constant_pool_allowed());
ASSERT(value != pp);
// PP is tagged on ARM.
const int32_t offset =
target::ObjectPool::element_offset(index) - kHeapObjectTag;
int32_t offset_mask = 0;
if (Address::CanHoldLoadOffset(kFourBytes, offset, &offset_mask)) {
str(value, Address(pp, offset), cond);
} else {
int32_t offset_hi = offset & ~offset_mask; // signed
uint32_t offset_lo = offset & offset_mask; // unsigned
// Inline a simplified version of AddImmediate(rd, pp, offset_hi).
Operand o;
if (Operand::CanHold(offset_hi, &o)) {
add(TMP, pp, o, cond);
} else {
LoadImmediate(TMP, offset_hi, cond);
add(TMP, pp, Operand(TMP), cond);
}
str(value, Address(TMP, offset_lo), cond);
}
}
void Assembler::CheckCodePointer() {
#ifdef DEBUG
if (!FLAG_check_code_pointer) {

View file

@ -983,6 +983,13 @@ class Assembler : public AssemblerBase {
intptr_t index,
Register pp = PP,
Condition cond = AL);
// Store word to pool at the given offset.
//
// Note: clobbers TMP.
void StoreWordToPoolIndex(Register value,
intptr_t index,
Register pp = PP,
Condition cond = AL);
void LoadObject(Register rd, const Object& object, Condition cond = AL);
void LoadUniqueObject(

View file

@ -434,6 +434,34 @@ void Assembler::LoadWordFromPoolIndex(Register dst,
}
}
void Assembler::StoreWordToPoolIndex(Register src,
intptr_t index,
Register pp) {
ASSERT((pp != PP) || constant_pool_allowed());
ASSERT(src != pp);
Operand op;
// PP is _un_tagged on ARM64.
const uint32_t offset = target::ObjectPool::element_offset(index);
const uint32_t upper20 = offset & 0xfffff000;
if (Address::CanHoldOffset(offset)) {
str(src, Address(pp, offset));
} else if (Operand::CanHold(upper20, kXRegSizeInBits, &op) ==
Operand::Immediate) {
const uint32_t lower12 = offset & 0x00000fff;
ASSERT(Address::CanHoldOffset(lower12));
add(TMP, pp, op);
str(src, Address(TMP, lower12));
} else {
const uint16_t offset_low = Utils::Low16Bits(offset);
const uint16_t offset_high = Utils::High16Bits(offset);
movz(TMP, Immediate(offset_low), 0);
if (offset_high != 0) {
movk(TMP, Immediate(offset_high), 1);
}
str(src, Address(pp, TMP));
}
}
void Assembler::LoadDoubleWordFromPoolIndex(Register lower,
Register upper,
intptr_t index) {

View file

@ -2173,6 +2173,11 @@ class Assembler : public AssemblerBase {
// Note: the function never clobbers TMP, TMP2 scratch registers.
void LoadWordFromPoolIndex(Register dst, intptr_t index, Register pp = PP);
// Store word to pool at the given offset.
//
// Note: clobbers TMP.
void StoreWordToPoolIndex(Register src, intptr_t index, Register pp = PP);
void LoadDoubleWordFromPoolIndex(Register lower,
Register upper,
intptr_t index);

View file

@ -381,11 +381,12 @@ intptr_t ObjectPoolBuilder::AddObject(
return AddObject(ObjectPoolBuilderEntry(&obj, patchable, snapshot_behavior));
}
intptr_t ObjectPoolBuilder::AddImmediate(uword imm) {
return AddObject(
ObjectPoolBuilderEntry(imm, ObjectPoolBuilderEntry::kImmediate,
ObjectPoolBuilderEntry::kNotPatchable,
ObjectPoolBuilderEntry::kSnapshotable));
intptr_t ObjectPoolBuilder::AddImmediate(
uword imm,
ObjectPoolBuilderEntry::Patchability patchable,
ObjectPoolBuilderEntry::SnapshotBehavior snapshotability) {
return AddObject(ObjectPoolBuilderEntry(
imm, ObjectPoolBuilderEntry::kImmediate, patchable, snapshotability));
}
intptr_t ObjectPoolBuilder::AddImmediate64(uint64_t imm) {

View file

@ -3713,6 +3713,24 @@ void Assembler::LoadWordFromPoolIndex(Register dst,
}
}
void Assembler::StoreWordToPoolIndex(Register src,
intptr_t index,
Register pp) {
ASSERT((pp != PP) || constant_pool_allowed());
ASSERT(src != pp);
const uint32_t offset = target::ObjectPool::element_offset(index);
// PP is untagged.
intx_t lo = ImmLo(offset);
intx_t hi = ImmHi(offset);
if (hi == 0) {
sx(src, Address(pp, lo));
} else {
lui(TMP, hi);
add(TMP, TMP, pp);
sx(src, Address(TMP, lo));
}
}
void Assembler::CompareObject(Register reg, const Object& object) {
ASSERT(IsOriginalObject(object));
if (IsSameObject(compiler::NullObject(), object)) {

View file

@ -1370,6 +1370,11 @@ class Assembler : public MicroAssembler {
// Note: the function never clobbers TMP, TMP2 scratch registers.
void LoadWordFromPoolIndex(Register dst, intptr_t index, Register pp = PP);
// Store word to pool at the given offset.
//
// Note: clobbers TMP, does not clobber TMP2.
void StoreWordToPoolIndex(Register src, intptr_t index, Register pp = PP);
void PushObject(const Object& object) {
if (IsSameObject(compiler::NullObject(), object)) {
PushRegister(NULL_REG);

View file

@ -1325,10 +1325,14 @@ void Assembler::LoadWordFromPoolIndex(Register dst, intptr_t idx) {
ASSERT(constant_pool_allowed());
ASSERT(dst != PP);
// PP is tagged on X64.
const int32_t offset =
target::ObjectPool::element_offset(idx) - kHeapObjectTag;
// This sequence must be decodable by code_patcher_x64.cc.
movq(dst, Address(PP, offset));
movq(dst, FieldAddress(PP, target::ObjectPool::element_offset(idx)));
}
void Assembler::StoreWordToPoolIndex(Register src, intptr_t idx) {
ASSERT(constant_pool_allowed());
ASSERT(src != PP);
// PP is tagged on X64.
movq(FieldAddress(PP, target::ObjectPool::element_offset(idx)), src);
}
void Assembler::LoadInt64FromBoxOrSmi(Register result, Register value) {

View file

@ -1477,6 +1477,9 @@ class Assembler : public AssemblerBase {
static bool IsSafe(const Object& object) { return true; }
static bool IsSafeSmi(const Object& object) { return target::IsSmi(object); }
void LoadWordFromPoolIndex(Register dst, intptr_t index);
void StoreWordToPoolIndex(Register src, intptr_t index);
private:
bool constant_pool_allowed_;
@ -1487,7 +1490,6 @@ class Assembler : public AssemblerBase {
bool is_unique,
ObjectPoolBuilderEntry::SnapshotBehavior snapshot_behavior =
ObjectPoolBuilderEntry::kSnapshotable);
void LoadWordFromPoolIndex(Register dst, intptr_t index);
void AluL(uint8_t modrm_opcode, Register dst, const Immediate& imm);
void AluB(uint8_t modrm_opcode, const Address& dst, const Immediate& imm);

View file

@ -44,6 +44,9 @@ struct ObjectPoolBuilderEntry {
// (`ic_data`, [kImmediate] `entrypoint`) in the object pool instead on
// deserialization.
kResetToSwitchableCallMissEntryPoint,
// Set the value to 0 on snapshot writing.
kSetToZero,
};
enum EntryType {
@ -228,7 +231,12 @@ class ObjectPoolBuilder : public ValueObject {
ObjectPoolBuilderEntry::kNotPatchable,
ObjectPoolBuilderEntry::SnapshotBehavior snapshot_behavior =
ObjectPoolBuilderEntry::kSnapshotable);
intptr_t AddImmediate(uword imm);
intptr_t AddImmediate(
uword imm,
ObjectPoolBuilderEntry::Patchability patchable =
ObjectPoolBuilderEntry::kNotPatchable,
ObjectPoolBuilderEntry::SnapshotBehavior snapshotability =
ObjectPoolBuilderEntry::kSnapshotable);
intptr_t AddImmediate64(uint64_t imm);
intptr_t AddImmediate128(simd128_value_t imm);

View file

@ -546,6 +546,12 @@ void ConstantPropagator::VisitStaticCall(StaticCallInstr* instr) {
SetValue(instr, non_constant_);
}
void ConstantPropagator::VisitCachableIdempotentCall(
CachableIdempotentCallInstr* instr) {
// This instruction should not be inserted if its value is constant.
SetValue(instr, non_constant_);
}
void ConstantPropagator::VisitLoadLocal(LoadLocalInstr* instr) {
// Instruction is eliminated when translating to SSA.
UNREACHABLE();

View file

@ -2555,6 +2555,10 @@ void FlowGraph::EliminateEnvironments() {
}
for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
Instruction* current = it.Current();
// This check is inconsistent with the flow graph checker. The flow graph
// checker does not allow for not having an env if the block is not
// inside a try-catch.
// See FlowGraphChecker::VisitInstruction.
if (!current->ComputeCanDeoptimize() &&
!current->ComputeCanDeoptimizeAfterCall() &&
(!current->MayThrow() || !current->GetBlock()->InsideTryBlock())) {

View file

@ -244,7 +244,9 @@ void FlowGraphChecker::VisitInstruction(Instruction* instruction) {
#if !defined(DART_PRECOMPILER)
// In JIT mode, any instruction which may throw must have a deopt-id, except
// tail-call because it replaces the stack frame.
ASSERT1(!instruction->MayThrow() || instruction->IsTailCall() ||
ASSERT1(!instruction->MayThrow() ||
!instruction->GetBlock()->InsideTryBlock() ||
instruction->IsTailCall() ||
instruction->deopt_id() != DeoptId::kNone,
instruction);
@ -513,11 +515,12 @@ void FlowGraphChecker::AssertArgumentsInEnv(Definition* call) {
call);
} else {
if (env->LazyDeoptToBeforeDeoptId()) {
// The deoptimization environment attached to this [call] instruction may
// no longer target the same call in unoptimized code. It may target anything.
// The deoptimization environment attached to this [call] instruction
// may no longer target the same call in unoptimized code. It may
// target anything.
//
// As a result, we cannot assume the arguments we pass to the call will also be
// in the deopt environment.
// As a result, we cannot assume the arguments we pass to the call
// will also be in the deopt environment.
//
// This currently can happen in inlined force-optimized instructions.
ASSERT(call->inlining_id() > 0);

View file

@ -472,8 +472,7 @@ void FlowGraphCompiler::EmitCallsiteMetadata(const InstructionSource& source,
if ((deopt_id != DeoptId::kNone) && !FLAG_precompiled_mode) {
// Marks either the continuation point in unoptimized code or the
// deoptimization point in optimized code, after call.
if (is_optimizing()) {
ASSERT(env != nullptr);
if (env != nullptr) {
// Note that we may lazy-deopt to the same IR instruction in unoptimized
// code or to another IR instruction (e.g. if LICM hoisted an instruction
// it will lazy-deopt to a Goto).
@ -484,7 +483,6 @@ void FlowGraphCompiler::EmitCallsiteMetadata(const InstructionSource& source,
: DeoptId::ToDeoptAfter(deopt_id);
AddDeoptIndexAtCall(dest_deopt_id, env);
} else {
ASSERT(env == nullptr);
const intptr_t deopt_id_after = DeoptId::ToDeoptAfter(deopt_id);
// Add deoptimization continuation point after the call and before the
// arguments are removed.

View file

@ -923,6 +923,15 @@ class FlowGraphCompiler : public ValueObject {
bool IsEmptyBlock(BlockEntryInstr* block) const;
void EmitOptimizedStaticCall(
const Function& function,
const Array& arguments_descriptor,
intptr_t size_with_type_args,
intptr_t deopt_id,
const InstructionSource& source,
LocationSummary* locs,
Code::EntryKind entry_kind = Code::EntryKind::kNormal);
private:
friend class BoxInt64Instr; // For AddPcRelativeCallStubTarget().
friend class CheckNullInstr; // For AddPcRelativeCallStubTarget().
@ -931,7 +940,7 @@ class FlowGraphCompiler : public ValueObject {
friend class StoreIndexedInstr; // For AddPcRelativeCallStubTarget().
friend class StoreFieldInstr; // For AddPcRelativeCallStubTarget().
friend class CheckStackOverflowSlowPath; // For pending_deoptimization_env_.
friend class GraphIntrinsicCodeGenScope; // For optimizing_.
friend class GraphIntrinsicCodeGenScope; // For optimizing_.
// Architecture specific implementation of simple native moves.
void EmitNativeMoveArchitecture(const compiler::ffi::NativeLocation& dst,
@ -956,15 +965,6 @@ class FlowGraphCompiler : public ValueObject {
// Emit code to load a Value into register 'dst'.
void LoadValue(Register dst, Value* value);
void EmitOptimizedStaticCall(
const Function& function,
const Array& arguments_descriptor,
intptr_t size_with_type_args,
intptr_t deopt_id,
const InstructionSource& source,
LocationSummary* locs,
Code::EntryKind entry_kind = Code::EntryKind::kNormal);
void EmitUnoptimizedStaticCall(
intptr_t size_with_type_args,
intptr_t deopt_id,

View file

@ -5,9 +5,12 @@
#include "vm/compiler/backend/il.h"
#include "platform/assert.h"
#include "platform/globals.h"
#include "vm/bit_vector.h"
#include "vm/bootstrap.h"
#include "vm/code_entry_kind.h"
#include "vm/compiler/aot/dispatch_table_generator.h"
#include "vm/compiler/assembler/object_pool_builder.h"
#include "vm/compiler/backend/code_statistics.h"
#include "vm/compiler/backend/constant_propagator.h"
#include "vm/compiler/backend/evaluator.h"
@ -5858,6 +5861,71 @@ void StaticCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
}
}
Representation CachableIdempotentCallInstr::RequiredInputRepresentation(
intptr_t idx) const {
// The first input is the array of types for generic functions.
if (type_args_len() > 0 || function().IsFactory()) {
if (idx == 0) {
return kTagged;
}
idx--;
}
return FlowGraph::ParameterRepresentationAt(function(), idx);
}
intptr_t CachableIdempotentCallInstr::ArgumentsSize() const {
return FlowGraph::ParameterOffsetAt(function(),
ArgumentCountWithoutTypeArgs(),
/*last_slot=*/false) +
((type_args_len() > 0) ? 1 : 0);
}
Definition* CachableIdempotentCallInstr::Canonicalize(FlowGraph* flow_graph) {
return this;
}
LocationSummary* CachableIdempotentCallInstr::MakeLocationSummary(
Zone* zone,
bool optimizing) const {
return MakeCallSummary(zone, this);
}
void CachableIdempotentCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
#if !defined(TARGET_ARCH_IA32)
Zone* zone = compiler->zone();
compiler::Label done;
const intptr_t cacheable_pool_index = __ object_pool_builder().AddImmediate(
0, compiler::ObjectPoolBuilderEntry::kPatchable,
compiler::ObjectPoolBuilderEntry::kSetToZero);
const Register dst = locs()->out(0).reg();
__ Comment(
"CachableIdempotentCall pool load and check. pool_index = "
"%" Pd,
cacheable_pool_index);
__ LoadWordFromPoolIndex(dst, cacheable_pool_index);
__ CompareImmediate(dst, 0);
__ BranchIf(NOT_EQUAL, &done);
__ Comment("CachableIdempotentCall pool load and check - end");
ArgumentsInfo args_info(type_args_len(), ArgumentCount(), ArgumentsSize(),
argument_names());
const Array& arguments_descriptor =
Array::ZoneHandle(zone, args_info.ToArgumentsDescriptor());
compiler->EmitOptimizedStaticCall(function(), arguments_descriptor,
args_info.size_with_type_args, deopt_id(),
source(), locs(), CodeEntryKind::kNormal);
__ Comment("CachableIdempotentCall pool store");
if (!function().HasUnboxedReturnValue()) {
__ LoadWordFromBoxOrSmi(dst, dst);
}
__ StoreWordToPoolIndex(dst, cacheable_pool_index);
__ Comment("CachableIdempotentCall pool store - end");
__ Bind(&done);
#endif
}
intptr_t AssertAssignableInstr::statistics_tag() const {
switch (kind_) {
case kParameterCheck:

View file

@ -442,6 +442,7 @@ struct InstrAttrs {
M(PolymorphicInstanceCall, _) \
M(DispatchTableCall, _) \
M(StaticCall, _) \
M(CachableIdempotentCall, _) \
M(LoadLocal, kNoGC) \
M(DropTemps, kNoGC) \
M(MakeTemp, kNoGC) \
@ -5643,6 +5644,94 @@ class StaticCallInstr : public TemplateDartCall<0> {
DISALLOW_COPY_AND_ASSIGN(StaticCallInstr);
};
// A call to a function which has no side effects and of which the result can
// be cached.
//
// The arguments flowing into this call must be const.
//
// The result is cached in the pool. Hence this instruction is not supported
// on IA32.
class CachableIdempotentCallInstr : public TemplateDartCall<0> {
public:
CachableIdempotentCallInstr(const InstructionSource& source,
const Function& function,
intptr_t type_args_len,
const Array& argument_names,
InputsArray&& arguments,
intptr_t deopt_id)
: TemplateDartCall(deopt_id,
type_args_len,
argument_names,
std::move(arguments),
source),
function_(function),
identity_(AliasIdentity::Unknown()) {
DEBUG_ASSERT(function.IsNotTemporaryScopedHandle());
ASSERT(AbstractType::Handle(function.result_type()).IsIntType());
ASSERT(!function.IsNull());
#if defined(TARGET_ARCH_IA32)
// No pool to cache in on IA32.
FATAL("Not supported on IA32.");
#endif
}
DECLARE_INSTRUCTION(CachableIdempotentCall)
const Function& function() const { return function_; }
virtual CompileType ComputeType() const { return CompileType::Int(); }
virtual Definition* Canonicalize(FlowGraph* flow_graph);
virtual bool ComputeCanDeoptimize() const { return false; }
virtual bool ComputeCanDeoptimizeAfterCall() const { return false; }
virtual bool CanBecomeDeoptimizationTarget() const { return false; }
virtual bool HasUnknownSideEffects() const { return true; }
virtual bool CanCallDart() const { return true; }
virtual SpeculativeMode SpeculativeModeOfInput(intptr_t idx) const {
if (type_args_len() > 0) {
if (idx == 0) {
return kGuardInputs;
}
idx--;
}
return function_.is_unboxed_parameter_at(idx) ? kNotSpeculative
: kGuardInputs;
}
virtual intptr_t ArgumentsSize() const;
virtual Representation RequiredInputRepresentation(intptr_t idx) const;
virtual Representation representation() const {
// If other representations are supported in the future, the location
// summary needs to be updated as well to stay consistent with static calls.
return kUnboxedFfiIntPtr;
}
virtual AliasIdentity Identity() const { return identity_; }
virtual void SetIdentity(AliasIdentity identity) { identity_ = identity; }
PRINT_OPERANDS_TO_SUPPORT
#define FIELD_LIST(F) \
F(const Function&, function_) \
F(AliasIdentity, identity_)
DECLARE_INSTRUCTION_SERIALIZABLE_FIELDS(CachableIdempotentCallInstr,
TemplateDartCall,
FIELD_LIST)
#undef FIELD_LIST
private:
DISALLOW_COPY_AND_ASSIGN(CachableIdempotentCallInstr);
};
class LoadLocalInstr : public TemplateDefinition<0, NoThrow> {
public:
LoadLocalInstr(const LocalVariable& local, const InstructionSource& source)

View file

@ -46,6 +46,8 @@ LocationSummary* Instruction::MakeCallSummary(Zone* zone,
const auto representation = instr->representation();
switch (representation) {
case kTagged:
case kUnboxedUint32:
case kUnboxedInt32:
result->set_out(
0, Location::RegisterLocation(CallingConventions::kReturnReg));
break;

View file

@ -904,6 +904,15 @@ void StaticCallInstr::PrintOperandsTo(BaseTextBuffer* f) const {
}
}
void CachableIdempotentCallInstr::PrintOperandsTo(BaseTextBuffer* f) const {
f->Printf(" %s<%" Pd "> ", String::Handle(function().name()).ToCString(),
type_args_len());
for (intptr_t i = 0; i < ArgumentCount(); ++i) {
if (i > 0) f->AddString(", ");
ArgumentValueAt(i)->PrintTo(f);
}
}
void LoadLocalInstr::PrintOperandsTo(BaseTextBuffer* f) const {
f->Printf("%s @%d", local().name().ToCString(), local().index().value());
}

View file

@ -966,6 +966,105 @@ ISOLATE_UNIT_TEST_CASE(IRTest_LoadThread) {
EXPECT_EQ(reinterpret_cast<intptr_t>(thread), result_int);
}
#if !defined(TARGET_ARCH_IA32)
ISOLATE_UNIT_TEST_CASE(IRTest_CachableIdempotentCall) {
// clang-format off
auto kScript = Utils::CStringUniquePtr(OS::SCreate(nullptr, R"(
int globalCounter = 0;
int increment() => ++globalCounter;
int cachedIncrement() {
// We will replace this call with a cacheable call,
// which will lead to the counter no longer being incremented.
// Make sure to return the value, so we can see that the boxing and
// unboxing works as expected.
return increment();
}
int multipleIncrement() {
int returnValue = 0;
for(int i = 0; i < 10; i++) {
// Save the last returned value.
returnValue = cachedIncrement();
}
return returnValue;
}
)"), std::free);
// clang-format on
const auto& root_library = Library::Handle(LoadTestScript(kScript.get()));
const auto& first_result =
Object::Handle(Invoke(root_library, "multipleIncrement"));
EXPECT(first_result.IsSmi());
if (first_result.IsSmi()) {
const intptr_t int_value = Smi::Cast(first_result).Value();
EXPECT_EQ(10, int_value);
}
const auto& cached_increment_function =
Function::Handle(GetFunction(root_library, "cachedIncrement"));
const auto& increment_function =
Function::ZoneHandle(GetFunction(root_library, "increment"));
TestPipeline pipeline(cached_increment_function, CompilerPass::kJIT);
FlowGraph* flow_graph = pipeline.RunPasses({
CompilerPass::kComputeSSA,
});
StaticCallInstr* static_call = nullptr;
{
ILMatcher cursor(flow_graph, flow_graph->graph_entry()->normal_entry());
EXPECT(cursor.TryMatch({
kMoveGlob,
{kMatchAndMoveStaticCall, &static_call},
kMoveGlob,
kMatchReturn,
}));
}
InputsArray args;
CachableIdempotentCallInstr* call = new CachableIdempotentCallInstr(
InstructionSource(), increment_function, static_call->type_args_len(),
Array::empty_array(), std::move(args), DeoptId::kNone);
static_call->ReplaceWith(call, nullptr);
pipeline.RunForcedOptimizedAfterSSAPasses();
{
ILMatcher cursor(flow_graph, flow_graph->graph_entry()->normal_entry());
EXPECT(cursor.TryMatch({
kMoveGlob,
kMatchAndMoveCachableIdempotentCall,
kMoveGlob,
// The cacheable call returns unboxed, so select representations
// adds boxing.
kMatchBox,
kMoveGlob,
kMatchReturn,
}));
}
{
#if !defined(PRODUCT)
SetFlagScope<bool> sfs(&FLAG_disassemble_optimized, true);
#endif
pipeline.CompileGraphAndAttachFunction();
}
const auto& second_result =
Object::Handle(Invoke(root_library, "multipleIncrement"));
EXPECT(second_result.IsSmi());
if (second_result.IsSmi()) {
const intptr_t int_value = Smi::Cast(second_result).Value();
EXPECT_EQ(11, int_value);
}
}
#endif
// Helper to set up an inlined FfiCall by replacing a StaticCall.
FlowGraph* SetupFfiFlowgraph(TestPipeline* pipeline,
Zone* zone,

View file

@ -10,6 +10,7 @@
#include "vm/compiler/frontend/flow_graph_builder.h" // For dart::FlowGraphBuilder::SimpleInstanceOfType.
#include "vm/compiler/frontend/prologue_builder.h"
#include "vm/compiler/jit/compiler.h"
#include "vm/kernel_binary.h"
#include "vm/object_store.h"
#include "vm/resolver.h"
#include "vm/stack_frame.h"
@ -3333,6 +3334,10 @@ Fragment StreamingFlowGraphBuilder::BuildStaticInvocation(TokenPosition* p) {
++argument_count;
}
if (target.IsCachableIdempotent()) {
return BuildCachableIdempotentCall(position, target);
}
const auto recognized_kind = target.recognized_kind();
switch (recognized_kind) {
case MethodRecognizer::kNativeEffect:
@ -6234,6 +6239,66 @@ Fragment StreamingFlowGraphBuilder::BuildFfiAsFunctionInternal() {
return code;
}
Fragment StreamingFlowGraphBuilder::BuildArgumentsCachableIdempotentCall(
intptr_t* argument_count) {
*argument_count = ReadUInt(); // read arguments count.
// List of types.
const intptr_t types_list_length = ReadListLength();
if (types_list_length != 0) {
FATAL("Type arguments for vm:cachable-idempotent not (yet) supported.");
}
Fragment code;
// List of positional.
intptr_t positional_list_length = ReadListLength();
for (intptr_t i = 0; i < positional_list_length; ++i) {
code += BuildExpression();
Definition* target_def = B->Peek();
if (!target_def->IsConstant()) {
FATAL(
"Arguments for vm:cachable-idempotent must be const, argument on "
"index %" Pd " is not.",
i);
}
}
// List of named.
const intptr_t named_args_len = ReadListLength();
if (named_args_len != 0) {
FATAL("Named arguments for vm:cachable-idempotent not (yet) supported.");
}
return code;
}
Fragment StreamingFlowGraphBuilder::BuildCachableIdempotentCall(
TokenPosition position,
const Function& target) {
// The call site must me fore optimized because the cache is untagged.
if (!parsed_function()->function().ForceOptimize()) {
FATAL(
"vm:cachable-idempotent functions can only be called from "
"vm:force-optimize functions.");
}
const auto& target_result_type = AbstractType::Handle(target.result_type());
if (!target_result_type.IsIntType()) {
FATAL("The return type vm:cachable-idempotent functions must be int.")
}
Fragment code;
Array& argument_names = Array::ZoneHandle(Z);
intptr_t argument_count;
code += BuildArgumentsCachableIdempotentCall(&argument_count);
code += flow_graph_builder_->CachableIdempotentCall(
position, target, argument_count, argument_names,
/*type_args_len=*/0);
code += flow_graph_builder_->Box(kUnboxedFfiIntPtr);
return code;
}
Fragment StreamingFlowGraphBuilder::BuildFfiNativeCallbackFunction(
FfiFunctionKind kind) {
// The call-site must look like this (guaranteed by the FE which inserts it):

View file

@ -395,6 +395,10 @@ class StreamingFlowGraphBuilder : public KernelReaderHelper {
// Kernel buffer and pushes the resulting Function object.
Fragment BuildFfiNativeCallbackFunction(FfiFunctionKind kind);
Fragment BuildArgumentsCachableIdempotentCall(intptr_t* argument_count);
Fragment BuildCachableIdempotentCall(TokenPosition position,
const Function& target);
// Piece of a StringConcatenation.
// Represents either a StringLiteral, or a Reader offset to the expression.
struct ConcatPiece {

View file

@ -658,6 +658,20 @@ Fragment FlowGraphBuilder::StaticCall(TokenPosition position,
return Fragment(call);
}
Fragment FlowGraphBuilder::CachableIdempotentCall(TokenPosition position,
const Function& target,
intptr_t argument_count,
const Array& argument_names,
intptr_t type_args_count) {
const intptr_t total_count = argument_count + (type_args_count > 0 ? 1 : 0);
InputsArray arguments = GetArguments(total_count);
CachableIdempotentCallInstr* call = new (Z) CachableIdempotentCallInstr(
InstructionSource(position), target, type_args_count, argument_names,
std::move(arguments), GetNextDeoptId());
Push(call);
return Fragment(call);
}
Fragment FlowGraphBuilder::StringInterpolateSingle(TokenPosition position) {
Fragment instructions;
instructions += StaticCall(

View file

@ -222,6 +222,11 @@ class FlowGraphBuilder : public BaseFlowGraphBuilder {
const InferredTypeMetadata* result_type = nullptr,
intptr_t type_args_len = 0,
bool use_unchecked_entry = false);
Fragment CachableIdempotentCall(TokenPosition position,
const Function& target,
intptr_t argument_count,
const Array& argument_names,
intptr_t type_args_len = 0);
Fragment StringInterpolateSingle(TokenPosition position);
Fragment StringInterpolate(TokenPosition position);
Fragment ThrowTypeError();

View file

@ -9066,22 +9066,35 @@ bool Function::IsUnmodifiableTypedDataViewFactory() const {
}
}
static bool InVmTests(const Function& function) {
#if defined(TESTING)
return true;
#else
auto* zone = Thread::Current()->zone();
const auto& cls = Class::Handle(zone, function.Owner());
const auto& lib = Library::Handle(zone, cls.library());
const auto& url = String::Handle(zone, lib.url());
const bool in_vm_tests =
strstr(url.ToCString(), "runtime/tests/vm/") != nullptr;
return in_vm_tests;
#endif
}
bool Function::ForceOptimize() const {
if (RecognizedKindForceOptimize() || IsFfiTrampoline() ||
IsTypedDataViewFactory() || IsUnmodifiableTypedDataViewFactory()) {
return true;
}
#if defined(TESTING)
// For run_vm_tests we allow marking arbitrary functions as force-optimize
// via `@pragma('vm:force-optimize')`.
if (has_pragma()) {
return Library::FindPragma(Thread::Current(), false, *this,
Symbols::vm_force_optimize());
}
#endif // defined(TESTING)
if (!has_pragma()) return false;
return false;
const bool has_vm_pragma = Library::FindPragma(
Thread::Current(), false, *this, Symbols::vm_force_optimize());
if (!has_vm_pragma) return false;
// For run_vm_tests and runtime/tests/vm allow marking arbitrary functions as
// force-optimize via `@pragma('vm:force-optimize')`.
return InVmTests(*this);
}
bool Function::IsIdempotent() const {
@ -9097,6 +9110,18 @@ bool Function::IsIdempotent() const {
*this, Symbols::vm_idempotent());
}
bool Function::IsCachableIdempotent() const {
if (!has_pragma()) return false;
const bool has_vm_pragma =
Library::FindPragma(Thread::Current(), /*only_core=*/false, *this,
Symbols::vm_cachable_idempotent());
if (!has_vm_pragma) return false;
// For run_vm_tests and runtime/tests/vm allow marking arbitrary functions.
return InVmTests(*this);
}
bool Function::RecognizedKindForceOptimize() const {
switch (recognized_kind()) {
// Uses unboxed/untagged data not supported in unoptimized.

View file

@ -3577,6 +3577,8 @@ class Function : public Object {
// and retry it again.
bool IsIdempotent() const;
bool IsCachableIdempotent() const;
// Whether this function's |recognized_kind| requires optimization.
bool RecognizedKindForceOptimize() const;

View file

@ -531,6 +531,7 @@ class ObjectPointerVisitor;
V(vm_idempotent, "vm:idempotent") \
V(vm_invisible, "vm:invisible") \
V(vm_isolate_unsendable, "vm:isolate-unsendable") \
V(vm_cachable_idempotent, "vm:cachable-idempotent") \
V(vm_never_inline, "vm:never-inline") \
V(vm_non_nullable_result_type, "vm:non-nullable-result-type") \
V(vm_notify_debugger_on_exception, "vm:notify-debugger-on-exception") \