[vm/inliner] Tweak inlining heuristic, improve List.of inlining.

Builds on top of https://dart-review.googlesource.com/c/sdk/+/228660, introduces vm:vm:always-consider-inlining that tell inliner not to give up inlining of a function.

HashMap constructor got prefer-inline pragma to accommodate shift in inlining away from where it used to be inlined due to large size of inlined HashMap, greater than FLAG_inline_getters_setters_smaller_than heuristic.

Notable performance changes:

JIT (x64)
===
ListCopy.List.of.fixed.100 27.37%
ListCopy.List.of.fixed.2 16.31%

AOT (x64)
===
Empty  -9.978%
InstantiateTypeArgs.Instantiate1  -8.262%
...
ListCopy.List.of.fixed.2 13.73%
ListCopy.spread.num.2 15.51%
ListCopy.List.of.2 15.08%
MapCopy.Map.String.of.Map.100 55.62%
MapCopy.Map.Thing.of.Map.100 56.06%

flutter release
===
flutter_gallery_apk_size (Pixel 2)
-0.0074% (1.0 noise)41809568.00 41812676.00


Addresses https://github.com/dart-lang/sdk/issues/49408

TEST=Inliner_always_consider_inlining, Inliner_List_of_inlined
Change-Id: I7f8fc7cb0ac4a69310c108cf519518c384dc0164
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/253740
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
This commit is contained in:
Alexander Aprelev 2022-08-15 21:14:36 +00:00 committed by Commit Bot
parent eff8c305d9
commit 0914835773
8 changed files with 208 additions and 23 deletions

View file

@ -25,6 +25,14 @@ factors can prevent inlining and thus this pragma may not always be respected.
Here, the VM will not inline the annotated function. In this case, the pragma
is always respected.
####
```dart
@pragma("vm:always-consider-inlining")
```
VM will keep trying to inline the function in new contexts, not giving up after encountered contexts where inlining wasn't effective. With this some compile time is traded for expectation that the function has signficant type specialization, resulting in highly efficient inlined results in contexts where arguments types are known to the compiler. Example of this is `_List.of` constructor in dart core library.
## Annotations for return types and field types
The VM is not able to see across method calls (apart from inlining) and

View file

@ -12,6 +12,7 @@ These pragmas are part of the VM's API and are safe for use in external code.
| `vm:notify-debugger-on-exception` | Marks a function that catches exceptions, making the VM treat any caught exception as if they were uncaught. This can be used to notify an attached debugger during debugging, without pausing the app during regular execution. |
| `vm:external-name` | Allows to specify an external (native) name for an `external` function. This name is used to lookup native implementation via native resolver associated with the current library through embedding APIs. This is a replacement for legacy VM specific `native "name"` syntax. |
| `vm:invisible` | Allows to mark a function as invisible so it will not appear on stack traces. |
| `vm:always-consider-inlining` | Marks a function which particularly benefits from inlining and specialization in context of the caller (for example, when concrete types of arguments are known). Inliner will not give up after one failed inlining attempt and will continue trying to inline this function.
## Unsafe pragmas for general use

View file

@ -524,38 +524,51 @@ class CallSites : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(CallSites);
};
// Determines if inlining this graph yields a small leaf node.
static bool IsSmallLeaf(FlowGraph* graph) {
// Determines if inlining this graph yields a small leaf node, or a sequence of
// static calls that is no larger than the call site it will replace.
static bool IsSmallLeafOrReduction(int inlining_depth,
intptr_t call_site_instructions,
FlowGraph* graph) {
intptr_t instruction_count = 0;
intptr_t call_count = 0;
for (BlockIterator block_it = graph->postorder_iterator(); !block_it.Done();
block_it.Advance()) {
BlockEntryInstr* entry = block_it.Current();
for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
Instruction* current = it.Current();
if (current->IsReturn()) continue;
++instruction_count;
if (current->IsInstanceCall() || current->IsPolymorphicInstanceCall() ||
current->IsClosureCall()) {
return false;
} else if (current->IsStaticCall()) {
}
if (current->IsStaticCall()) {
const Function& function = current->AsStaticCall()->function();
const intptr_t inl_size = function.optimized_instruction_count();
const bool always_inline =
FlowGraphInliner::FunctionHasPreferInlinePragma(function);
// Accept a static call is always inlined in some way and add the
// cached size to the total instruction count. A reasonable guess
// is made if the count has not been collected yet (listed methods
// are never very large).
if (!always_inline && !function.IsRecognized()) {
return false;
}
if (!always_inline) {
static constexpr intptr_t kAvgListedMethodSize = 20;
instruction_count +=
(inl_size == 0 ? kAvgListedMethodSize : inl_size);
// Accept a static call that is always inlined in some way and add the
// cached size to the total instruction count. A reasonable guess is
// made if the count has not been collected yet (listed methods are
// never very large).
if (always_inline || function.IsRecognized()) {
if (!always_inline) {
static constexpr intptr_t kAvgListedMethodSize = 20;
instruction_count +=
(inl_size == 0 ? kAvgListedMethodSize : inl_size);
}
} else {
++call_count;
instruction_count += current->AsStaticCall()->ArgumentCount();
instruction_count += 1; // pop the call frame.
}
continue;
}
}
}
if (call_count > 0) {
return instruction_count <= call_site_instructions;
}
return instruction_count <= FLAG_inlining_small_leaf_size_threshold;
}
@ -1036,6 +1049,8 @@ class CallSiteInliner : public ValueObject {
if (FlowGraphInliner::FunctionHasNeverInlinePragma(function)) {
TRACE_INLINING(THR_Print(" Bailout: vm:never-inline pragma\n"));
PRINT_INLINING_TREE("vm:never-inline", &call_data->caller, &function,
call_data->call);
return false;
}
@ -1156,6 +1171,8 @@ class CallSiteInliner : public ValueObject {
TRACE_INLINING(
THR_Print(" Bailout: not inlinable due to "
"!function.CanBeInlined()\n"));
PRINT_INLINING_TREE("Not inlinable", &call_data->caller, &function,
call_data->call);
return false;
}
}
@ -1352,9 +1369,19 @@ class CallSiteInliner : public ValueObject {
ShouldWeInline(function, instruction_count, call_site_count);
if (!decision.value) {
// If size is larger than all thresholds, don't consider it again.
// TODO(dartbug.com/49665): Make compiler smart enough so it itself
// can identify highly-specialized functions that should always
// be considered for inlining, without relying on a pragma.
if ((instruction_count > FLAG_inlining_size_threshold) &&
(call_site_count > FLAG_inlining_callee_call_sites_threshold)) {
function.set_is_inlinable(false);
// Will keep trying to inline the function if it can be
// specialized based on argument types.
if (!FlowGraphInliner::FunctionHasAlwaysConsiderInliningPragma(
function)) {
function.set_is_inlinable(false);
TRACE_INLINING(THR_Print(" Mark not inlinable\n"));
}
}
TRACE_INLINING(
THR_Print(" Bailout: heuristics (%s) with "
@ -1376,7 +1403,13 @@ class CallSiteInliner : public ValueObject {
// TODO(ajcbik): with the now better bookkeeping, explore removing
// this
if (stricter_heuristic) {
if (!IsSmallLeaf(callee_graph)) {
intptr_t call_site_instructions = 0;
if (auto static_call = call->AsStaticCall()) {
// Push all the arguments, do the call, drop arguments.
call_site_instructions = static_call->ArgumentCount() + 1 + 1;
}
if (!IsSmallLeafOrReduction(inlining_depth_, call_site_instructions,
callee_graph)) {
TRACE_INLINING(
THR_Print(" Bailout: heuristics (no small leaf)\n"));
PRINT_INLINING_TREE("Heuristic fail (no small leaf)",
@ -2440,6 +2473,19 @@ bool FlowGraphInliner::FunctionHasNeverInlinePragma(const Function& function) {
/*multiple=*/false, &options);
}
bool FlowGraphInliner::FunctionHasAlwaysConsiderInliningPragma(
const Function& function) {
if (!function.has_pragma()) {
return false;
}
Thread* thread = dart::Thread::Current();
COMPILER_TIMINGS_TIMER_SCOPE(thread, CheckForPragma);
Object& options = Object::Handle();
return Library::FindPragma(thread, /*only_core=*/false, function,
Symbols::vm_always_consider_inlining(),
/*multiple=*/false, &options);
}
bool FlowGraphInliner::AlwaysInline(const Function& function) {
if (FunctionHasPreferInlinePragma(function)) {
TRACE_INLINING(

View file

@ -123,6 +123,7 @@ class FlowGraphInliner : ValueObject {
static bool FunctionHasPreferInlinePragma(const Function& function);
static bool FunctionHasNeverInlinePragma(const Function& function);
static bool FunctionHasAlwaysConsiderInliningPragma(const Function& function);
FlowGraph* flow_graph() const { return flow_graph_; }
intptr_t NextInlineId(const Function& function,

View file

@ -246,6 +246,132 @@ ISOLATE_UNIT_TEST_CASE(Inliner_List_generate) {
}));
}
// Verifies that pragma-decorated call gets inlined.
ISOLATE_UNIT_TEST_CASE(Inliner_always_consider_inlining) {
const char* kScript = R"(
choice() {
dynamic x;
return x == 123;
}
@pragma("vm:always-consider-inlining")
bar(baz) {
if (baz is String) {
return 1;
}
if (baz is num) {
return 2;
}
if (baz is bool) {
dynamic j = 0;
for (int i = 0; i < 1024; i++) {
j += "i: $i".length;
}
return j;
}
return 4;
}
bbar(bbaz, something) {
if (bbaz == null) {
return "null";
}
return bar(bbaz);
}
main(args) {
print(bbar(42, "something"));
print(bbar(choice() ? "abc": 42, "something"));
print(bbar("abc", "something"));
}
)";
const auto& root_library = Library::Handle(LoadTestScript(kScript));
const auto& function = Function::Handle(GetFunction(root_library, "main"));
TestPipeline pipeline(function, CompilerPass::kAOT);
FlowGraph* flow_graph = pipeline.RunPasses({});
auto entry = flow_graph->graph_entry()->normal_entry();
ILMatcher cursor(flow_graph, entry, /*trace=*/true);
StaticCallInstr* call_print1;
StaticCallInstr* call_print2;
StaticCallInstr* call_print3;
StaticCallInstr* call_bar;
RELEASE_ASSERT(cursor.TryMatch({
kMoveGlob,
{kMatchAndMoveStaticCall, &call_print1},
kMoveGlob,
{kMatchAndMoveStaticCall, &call_bar},
kMoveGlob,
{kMatchAndMoveStaticCall, &call_print2},
kMoveGlob,
{kMatchAndMoveStaticCall, &call_print3},
kMoveGlob,
kMatchReturn,
}));
EXPECT(strcmp(call_print1->function().UserVisibleNameCString(), "print") ==
0);
EXPECT(strcmp(call_print2->function().UserVisibleNameCString(), "print") ==
0);
EXPECT(strcmp(call_print3->function().UserVisibleNameCString(), "print") ==
0);
EXPECT(strcmp(call_bar->function().UserVisibleNameCString(), "bar") == 0);
}
// Verifies that List.of gets inlined.
ISOLATE_UNIT_TEST_CASE(Inliner_List_of_inlined) {
const char* kScript = R"(
main() {
final foo = List<String>.filled(100, "bar");
final the_copy1 = List.of(foo, growable: false);
print('${the_copy1.length}');
}
)";
const auto& root_library = Library::Handle(LoadTestScript(kScript));
const auto& function = Function::Handle(GetFunction(root_library, "main"));
TestPipeline pipeline(function, CompilerPass::kAOT);
FlowGraph* flow_graph = pipeline.RunPasses({});
auto entry = flow_graph->graph_entry()->normal_entry();
ILMatcher cursor(flow_graph, entry, /*trace=*/true);
StaticCallInstr* call_get_length;
StaticCallInstr* call_interpolate;
StaticCallInstr* call_print;
RELEASE_ASSERT(cursor.TryMatch({
kMoveGlob,
kMatchAndMoveJoinEntry,
kMoveGlob,
kMatchAndMoveBranchTrue,
kMoveGlob,
kMatchAndMoveJoinEntry,
kMatchAndMoveCheckStackOverflow,
kMatchAndMoveBranchFalse,
kMatchAndMoveTargetEntry,
kMoveGlob,
kMatchAndMoveJoinEntry,
kMoveGlob,
kMatchAndMoveBranchFalse,
kMoveGlob,
{kMatchAndMoveStaticCall, &call_get_length},
kMoveGlob,
{kMatchAndMoveStaticCall, &call_interpolate},
kMoveGlob,
{kMatchAndMoveStaticCall, &call_print},
kMoveGlob,
kMatchReturn,
}));
EXPECT(strcmp(call_get_length->function().UserVisibleNameCString(),
"length") == 0);
EXPECT(strcmp(call_interpolate->function().UserVisibleNameCString(),
"_interpolateSingle") == 0);
EXPECT(strcmp(call_print->function().UserVisibleNameCString(), "print") == 0);
}
#endif // defined(DART_PRECOMPILER)
} // namespace dart

View file

@ -496,20 +496,21 @@ class ObjectPointerVisitor;
V(state, "state") \
V(string_param, ":string_param") \
V(string_param_length, ":string_param_length") \
V(vm_prefer_inline, "vm:prefer-inline") \
V(vm_always_consider_inlining, "vm:always-consider-inlining") \
V(vm_entry_point, "vm:entry-point") \
V(vm_exact_result_type, "vm:exact-result-type") \
V(vm_external_name, "vm:external-name") \
V(vm_ffi_abi_specific_mapping, "vm:ffi:abi-specific-mapping") \
V(vm_ffi_struct_fields, "vm:ffi:struct-fields") \
V(vm_invisible, "vm:invisible") \
V(vm_never_inline, "vm:never-inline") \
V(vm_non_nullable_result_type, "vm:non-nullable-result-type") \
V(vm_notify_debugger_on_exception, "vm:notify-debugger-on-exception") \
V(vm_prefer_inline, "vm:prefer-inline") \
V(vm_recognized, "vm:recognized") \
V(vm_testing_print_flow_graph, "vm:testing:print-flow-graph") \
V(vm_trace_entrypoints, "vm:testing.unsafe.trace-entrypoints-fn") \
V(vm_ffi_abi_specific_mapping, "vm:ffi:abi-specific-mapping") \
V(vm_ffi_struct_fields, "vm:ffi:struct-fields") \
V(vm_unsafe_no_interrupts, "vm:unsafe:no-interrupts") \
V(vm_external_name, "vm:external-name") \
V(vm_invisible, "vm:invisible") \
V(vm_testing_print_flow_graph, "vm:testing:print-flow-graph")
V(vm_unsafe_no_interrupts, "vm:unsafe:no-interrupts")
// Contains a list of frequently used strings in a canonicalized form. This
// list is kept in the vm_isolate in order to share the copy across isolates

View file

@ -115,6 +115,7 @@ class _List<E> extends _Array<E> {
}
// Specialization of List.of constructor for growable == false.
@pragma("vm:always-consider-inlining")
factory _List.of(Iterable<E> elements) {
if (elements is _GrowableList) {
return _List._ofGrowableList(unsafeCast(elements));

View file

@ -25,6 +25,7 @@ class _TypeTest<T> {
@patch
class HashMap<K, V> {
@patch
@pragma("vm:prefer-inline")
factory HashMap(
{bool equals(K key1, K key2)?,
int hashCode(K key)?,