[vm/compiler] Add --print-precompiler-timings

This flag allows dumping timings of various precompiler passes.

TEST=precompiled dart2js with --print-precompiler-timings

Change-Id: I54d6fdf26c25a0e43ae4f2717e833e09e9321d81
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/198407
Commit-Queue: Vyacheslav Egorov <vegorov@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
This commit is contained in:
Vyacheslav Egorov 2021-05-11 11:15:53 +00:00 committed by commit-bot@chromium.org
parent 53448def73
commit 34add230f6
18 changed files with 609 additions and 112 deletions

View file

@ -48,7 +48,7 @@ BENCHMARK(CorelibCompileAll) {
TransitionNativeToVM transition(thread);
StackZone zone(thread);
HANDLESCOPE(thread);
Timer timer(true, "Compile all of Core lib benchmark");
Timer timer;
timer.Start();
const Error& error =
Error::Handle(Library::CompileAll(/*ignore_error=*/true));
@ -92,7 +92,7 @@ static char* ComputeKernelServicePath(const char* arg) {
//
BENCHMARK(CorelibIsolateStartup) {
const int kNumIterations = 1000;
Timer timer(true, "CorelibIsolateStartup");
Timer timer;
Isolate* isolate = thread->isolate();
Dart_ExitIsolate();
for (int i = 0; i < kNumIterations; i++) {
@ -192,7 +192,7 @@ BENCHMARK(UseDartApi) {
result = Dart_Invoke(lib, NewString("benchmark"), 1, args);
EXPECT_VALID(result);
Timer timer(true, "UseDartApi benchmark");
Timer timer;
timer.Start();
result = Dart_Invoke(lib, NewString("benchmark"), 1, args);
EXPECT_VALID(result);
@ -208,7 +208,7 @@ static void NoopFinalizer(void* isolate_callback_data, void* peer) {}
//
BENCHMARK(DartStringAccess) {
const int kNumIterations = 10000000;
Timer timer(true, "DartStringAccess benchmark");
Timer timer;
timer.Start();
Dart_EnterScope();
@ -281,7 +281,7 @@ BENCHMARK(KernelServiceCompileAll) {
result = Dart_FinalizeLoading(false);
EXPECT_VALID(result);
Timer timer(true, "Compile all of kernel service benchmark");
Timer timer;
timer.Start();
#if !defined(PRODUCT)
const bool old_flag = FLAG_background_compilation;
@ -303,7 +303,7 @@ BENCHMARK(KernelServiceCompileAll) {
// Measure frame lookup during stack traversal.
//
static void StackFrame_accessFrame(Dart_NativeArguments args) {
Timer timer(true, "LookupDartCode benchmark");
Timer timer;
timer.Start();
{
Thread* thread = Thread::Current();
@ -474,7 +474,7 @@ BENCHMARK(CreateMirrorSystem) {
Dart_Handle lib = TestCase::LoadTestScript(kScriptChars, NULL);
Timer timer(true, "currentMirrorSystem() benchmark");
Timer timer;
timer.Start();
Dart_Handle result = Dart_Invoke(lib, NewString("benchmark"), 0, NULL);
EXPECT_VALID(result);
@ -496,7 +496,7 @@ BENCHMARK(EnterExitIsolate) {
Api::CheckAndFinalizePendingClasses(thread);
}
Dart_Isolate isolate = Dart_CurrentIsolate();
Timer timer(true, "Enter and Exit isolate");
Timer timer;
timer.Start();
for (intptr_t i = 0; i < kLoopCount; i++) {
Dart_ExitIsolate();
@ -513,7 +513,7 @@ BENCHMARK(SerializeNull) {
HANDLESCOPE(thread);
const Object& null_object = Object::Handle();
const intptr_t kLoopCount = 1000000;
Timer timer(true, "Serialize Null");
Timer timer;
timer.Start();
for (intptr_t i = 0; i < kLoopCount; i++) {
StackZone zone(thread);
@ -536,7 +536,7 @@ BENCHMARK(SerializeSmi) {
HANDLESCOPE(thread);
const Integer& smi_object = Integer::Handle(Smi::New(42));
const intptr_t kLoopCount = 1000000;
Timer timer(true, "Serialize Smi");
Timer timer;
timer.Start();
for (intptr_t i = 0; i < kLoopCount; i++) {
StackZone zone(thread);
@ -561,7 +561,7 @@ BENCHMARK(SimpleMessage) {
array_object.SetAt(0, Integer::Handle(Smi::New(42)));
array_object.SetAt(1, Object::Handle());
const intptr_t kLoopCount = 1000000;
Timer timer(true, "Simple Message");
Timer timer;
timer.Start();
for (intptr_t i = 0; i < kLoopCount; i++) {
StackZone zone(thread);
@ -595,7 +595,7 @@ BENCHMARK(LargeMap) {
Instance& map = Instance::Handle();
map ^= Api::UnwrapHandle(h_result);
const intptr_t kLoopCount = 100;
Timer timer(true, "Large Map");
Timer timer;
timer.Start();
for (intptr_t i = 0; i < kLoopCount; i++) {
StackZone zone(thread);

View file

@ -27,6 +27,7 @@
#include "vm/compiler/cha.h"
#include "vm/compiler/compiler_pass.h"
#include "vm/compiler/compiler_state.h"
#include "vm/compiler/compiler_timings.h"
#include "vm/compiler/frontend/flow_graph_builder.h"
#include "vm/compiler/frontend/kernel_to_il.h"
#include "vm/compiler/jit/compiler.h"
@ -60,6 +61,10 @@ namespace dart {
#define IG (isolate_group())
#define Z (zone())
DEFINE_FLAG(bool,
print_precompiler_timings,
false,
"Print per-phase breakdown of time spent precompiling");
DEFINE_FLAG(bool, print_unique_targets, false, "Print unique dynamic targets");
DEFINE_FLAG(bool, print_gop, false, "Print global object pool");
DEFINE_FLAG(bool, trace_precompiler, false, "Trace precompiler.");
@ -331,12 +336,21 @@ ErrorPtr Precompiler::CompileAll() {
if (setjmp(*jump.Set()) == 0) {
Precompiler precompiler(Thread::Current());
precompiler.DoCompileAll();
precompiler.ReportStats();
return Error::null();
} else {
return Thread::Current()->StealStickyError();
}
}
void Precompiler::ReportStats() {
if (!FLAG_print_precompiler_timings) {
return;
}
thread()->compiler_timings()->Print();
}
Precompiler::Precompiler(Thread* thread)
: thread_(thread),
zone_(NULL),
@ -378,6 +392,10 @@ Precompiler::Precompiler(Thread* thread)
get_runtime_type_is_unique_(false) {
ASSERT(Precompiler::singleton_ == NULL);
Precompiler::singleton_ = this;
if (FLAG_print_precompiler_timings) {
thread->set_compiler_timings(new CompilerTimings());
}
}
Precompiler::~Precompiler() {
@ -389,9 +407,13 @@ Precompiler::~Precompiler() {
ASSERT(Precompiler::singleton_ == this);
Precompiler::singleton_ = NULL;
delete thread()->compiler_timings();
thread()->set_compiler_timings(nullptr);
}
void Precompiler::DoCompileAll() {
PRECOMPILER_TIMER_SCOPE(this, CompileAll);
{
StackZone stack_zone(T);
zone_ = stack_zone.GetZone();
@ -530,44 +552,60 @@ void Precompiler::DoCompileAll() {
tracer_ = nullptr;
}
TraceForRetainedFunctions();
{
PRECOMPILER_TIMER_SCOPE(this, TraceForRetainedFunctions);
TraceForRetainedFunctions();
}
FinalizeDispatchTable();
ReplaceFunctionStaticCallEntries();
DropFunctions();
DropFields();
TraceTypesFromRetainedClasses();
DropTypes();
DropFunctionTypes();
DropTypeParameters();
DropTypeArguments();
{
PRECOMPILER_TIMER_SCOPE(this, Drop);
// Clear these before dropping classes as they may hold onto otherwise
// dead instances of classes we will remove or otherwise unused symbols.
IG->object_store()->set_unique_dynamic_targets(Array::null_array());
Class& null_class = Class::Handle(Z);
Function& null_function = Function::Handle(Z);
Field& null_field = Field::Handle(Z);
IG->object_store()->set_pragma_class(null_class);
IG->object_store()->set_pragma_name(null_field);
IG->object_store()->set_pragma_options(null_field);
IG->object_store()->set_completer_class(null_class);
IG->object_store()->set_symbol_class(null_class);
IG->object_store()->set_compiletime_error_class(null_class);
IG->object_store()->set_growable_list_factory(null_function);
IG->object_store()->set_simple_instance_of_function(null_function);
IG->object_store()->set_simple_instance_of_true_function(null_function);
IG->object_store()->set_simple_instance_of_false_function(null_function);
IG->object_store()->set_async_star_move_next_helper(null_function);
IG->object_store()->set_complete_on_async_return(null_function);
IG->object_store()->set_async_star_stream_controller(null_class);
DropMetadata();
DropLibraryEntries();
DropFunctions();
DropFields();
TraceTypesFromRetainedClasses();
DropTypes();
DropFunctionTypes();
DropTypeParameters();
DropTypeArguments();
// Clear these before dropping classes as they may hold onto otherwise
// dead instances of classes we will remove or otherwise unused symbols.
IG->object_store()->set_unique_dynamic_targets(Array::null_array());
Class& null_class = Class::Handle(Z);
Function& null_function = Function::Handle(Z);
Field& null_field = Field::Handle(Z);
IG->object_store()->set_pragma_class(null_class);
IG->object_store()->set_pragma_name(null_field);
IG->object_store()->set_pragma_options(null_field);
IG->object_store()->set_completer_class(null_class);
IG->object_store()->set_symbol_class(null_class);
IG->object_store()->set_compiletime_error_class(null_class);
IG->object_store()->set_growable_list_factory(null_function);
IG->object_store()->set_simple_instance_of_function(null_function);
IG->object_store()->set_simple_instance_of_true_function(null_function);
IG->object_store()->set_simple_instance_of_false_function(
null_function);
IG->object_store()->set_async_star_move_next_helper(null_function);
IG->object_store()->set_complete_on_async_return(null_function);
IG->object_store()->set_async_star_stream_controller(null_class);
DropMetadata();
DropLibraryEntries();
}
}
DropClasses();
DropLibraries();
Obfuscate();
{
PRECOMPILER_TIMER_SCOPE(this, Drop);
DropClasses();
DropLibraries();
}
{
PRECOMPILER_TIMER_SCOPE(this, Obfuscate);
Obfuscate();
}
#if defined(DEBUG)
const auto& non_visited =
@ -578,7 +616,11 @@ void Precompiler::DoCompileAll() {
}
#endif
DiscardCodeObjects();
ProgramVisitor::Dedup(T);
{
PRECOMPILER_TIMER_SCOPE(this, Dedup);
ProgramVisitor::Dedup(T);
}
if (FLAG_write_retained_reasons_to != nullptr) {
reasons_writer.Write();
@ -614,6 +656,7 @@ void Precompiler::DoCompileAll() {
}
void Precompiler::PrecompileConstructors() {
PRECOMPILER_TIMER_SCOPE(this, PrecompileConstructors);
class ConstructorVisitor : public FunctionVisitor {
public:
explicit ConstructorVisitor(Precompiler* precompiler, Zone* zone)
@ -683,6 +726,8 @@ void Precompiler::AddRoots() {
}
void Precompiler::Iterate() {
PRECOMPILER_TIMER_SCOPE(this, Iterate);
Function& function = Function::Handle(Z);
phase_ = Phase::kFixpointCodeGeneration;
@ -792,6 +837,7 @@ void Precompiler::ProcessFunction(const Function& function) {
if (!error_.IsNull()) {
Jump(error_);
}
// Used in the JIT to save type-feedback across compilations.
function.ClearICDataArray();
AddCalleesOf(function, gop_offset);
@ -1886,6 +1932,7 @@ void Precompiler::TraceForRetainedFunctions() {
}
void Precompiler::FinalizeDispatchTable() {
PRECOMPILER_TIMER_SCOPE(this, FinalizeDispatchTable);
if (!FLAG_use_bare_instructions || !FLAG_use_table_dispatch) return;
// Build the entries used to serialize the dispatch table before
// dropping functions, as we may clear references to Code objects.
@ -1918,6 +1965,7 @@ void Precompiler::FinalizeDispatchTable() {
}
void Precompiler::ReplaceFunctionStaticCallEntries() {
PRECOMPILER_TIMER_SCOPE(this, ReplaceFunctionStaticCallEntries);
class StaticCallTableEntryFixer : public CodeVisitor {
public:
explicit StaticCallTableEntryFixer(Zone* zone)
@ -2162,6 +2210,7 @@ void Precompiler::DropFields() {
}
void Precompiler::AttachOptimizedTypeTestingStub() {
PRECOMPILER_TIMER_SCOPE(this, AttachOptimizedTypeTestingStub);
IsolateGroup::Current()->heap()->CollectAllGarbage();
GrowableHandlePtrArray<const AbstractType> types(Z, 200);
{
@ -3083,6 +3132,7 @@ bool PrecompileParsedFunctionHelper::Compile(CompilationPipeline* pipeline) {
ic_data_array = new (zone) ZoneGrowableArray<const ICData*>();
TIMELINE_DURATION(thread(), CompilerVerbose, "BuildFlowGraph");
COMPILER_TIMINGS_TIMER_SCOPE(thread(), BuildGraph);
flow_graph =
pipeline->BuildFlowGraph(zone, parsed_function(), ic_data_array,
Compiler::kNoOSRDeoptId, optimized());
@ -3169,10 +3219,12 @@ bool PrecompileParsedFunctionHelper::Compile(CompilationPipeline* pipeline) {
pass_state.inline_id_to_token_pos, pass_state.caller_inline_id,
ic_data_array, function_stats);
{
COMPILER_TIMINGS_TIMER_SCOPE(thread(), EmitCode);
TIMELINE_DURATION(thread(), CompilerVerbose, "CompileGraph");
graph_compiler.CompileGraph();
}
{
COMPILER_TIMINGS_TIMER_SCOPE(thread(), FinalizeCode);
TIMELINE_DURATION(thread(), CompilerVerbose, "FinalizeCompilation");
ASSERT(thread()->IsMutatorThread());
FinalizeCompilation(&assembler, &graph_compiler, flow_graph,
@ -3282,7 +3334,7 @@ static ErrorPtr PrecompileFunctionHelper(Precompiler* precompiler,
Zone* const zone = stack_zone.GetZone();
const bool trace_compiler =
FLAG_trace_compiler || (FLAG_trace_optimizing_compiler && optimized);
Timer per_compile_timer(trace_compiler, "Compilation time");
Timer per_compile_timer;
per_compile_timer.Start();
ParsedFunction* parsed_function = new (zone)
@ -3346,6 +3398,7 @@ ErrorPtr Precompiler::CompileFunction(Precompiler* precompiler,
Thread* thread,
Zone* zone,
const Function& function) {
PRECOMPILER_TIMER_SCOPE(precompiler, CompileFunction);
NoActiveIsolateScope no_isolate_scope;
VMTagScope tagScope(thread, VMTag::kCompileUnoptimizedTagId);

View file

@ -16,6 +16,7 @@
#include "vm/hash_table.h"
#include "vm/object.h"
#include "vm/symbols.h"
#include "vm/timer.h"
namespace dart {
@ -267,6 +268,10 @@ class Precompiler : public ValueObject {
bool is_tracing() const { return is_tracing_; }
Thread* thread() const { return thread_; }
Zone* zone() const { return zone_; }
Isolate* isolate() const { return isolate_; }
private:
static Precompiler* singleton_;
@ -289,6 +294,8 @@ class Precompiler : public ValueObject {
explicit Precompiler(Thread* thread);
~Precompiler();
void ReportStats();
void DoCompileAll();
void AddRoots();
void AddAnnotatedRoots();
@ -348,9 +355,6 @@ class Precompiler : public ValueObject {
void FinalizeAllClasses();
Thread* thread() const { return thread_; }
Zone* zone() const { return zone_; }
Isolate* isolate() const { return isolate_; }
IsolateGroup* isolate_group() const { return thread_->isolate_group(); }
Thread* thread_;

View file

@ -12,6 +12,7 @@
#include "vm/compiler/backend/range_analysis.h"
#include "vm/compiler/cha.h"
#include "vm/compiler/compiler_state.h"
#include "vm/compiler/compiler_timings.h"
#include "vm/compiler/frontend/flow_graph_builder.h"
#include "vm/growable_array.h"
#include "vm/object_store.h"
@ -336,6 +337,8 @@ class BlockTraversalState {
};
void FlowGraph::DiscoverBlocks() {
COMPILER_TIMINGS_TIMER_SCOPE(thread(), DiscoverBlocks);
StackZone zone(thread());
// Initialize state.

View file

@ -3952,7 +3952,6 @@ class TemplateDartCall : public Definition {
ArgumentsSizeWithoutTypeArgs(), argument_names());
}
private:
virtual void RawSetInputAt(intptr_t i, Value* value) {
(*inputs_)[i] = value;

View file

@ -13,6 +13,7 @@
#include "vm/compiler/backend/il_printer.h"
#include "vm/compiler/backend/type_propagator.h"
#include "vm/compiler/compiler_pass.h"
#include "vm/compiler/compiler_timings.h"
#include "vm/compiler/frontend/flow_graph_builder.h"
#include "vm/compiler/frontend/kernel_to_il.h"
#include "vm/compiler/jit/compiler.h"
@ -440,6 +441,7 @@ class CallSites : public ValueObject {
void FindCallSites(FlowGraph* graph,
intptr_t depth,
GrowableArray<InlinedInfo>* inlined_info) {
COMPILER_TIMINGS_TIMER_SCOPE(graph->thread(), FindCallSites);
ASSERT(graph != NULL);
if (depth > inlining_depth_threshold_) {
if (FLAG_print_inlining_tree) {
@ -891,6 +893,24 @@ class CallSiteInliner : public ValueObject {
const Array& argument_names,
InlinedCallData* call_data,
bool stricter_heuristic) {
Timer timer;
if (thread()->compiler_timings() != nullptr) {
timer.Start();
}
const bool success = TryInliningImpl(function, argument_names, call_data,
stricter_heuristic);
if (thread()->compiler_timings() != nullptr) {
timer.Stop();
thread()->compiler_timings()->RecordInliningStatsByOutcome(success,
timer);
}
return success;
}
bool TryInliningImpl(const Function& function,
const Array& argument_names,
InlinedCallData* call_data,
bool stricter_heuristic) {
if (trace_inlining()) {
String& name = String::Handle(function.QualifiedUserVisibleName());
THR_Print(" => %s (deopt count %d)\n", name.ToCString(),
@ -1040,6 +1060,7 @@ class CallSiteInliner : public ValueObject {
caller_graph_->max_block_id() + 1,
entry_kind == Code::EntryKind::kUnchecked);
{
COMPILER_TIMINGS_TIMER_SCOPE(thread(), BuildGraph);
callee_graph = builder.BuildGraph();
#if defined(DEBUG)
// The inlining IDs of instructions in the callee graph are unset
@ -1051,6 +1072,10 @@ class CallSiteInliner : public ValueObject {
#endif
CalleeGraphValidator::Validate(callee_graph);
}
{
COMPILER_TIMINGS_TIMER_SCOPE(thread(), PopulateWithICData);
#if defined(DART_PRECOMPILER) && !defined(TARGET_ARCH_IA32)
if (CompilerState::Current().is_aot()) {
callee_graph->PopulateWithICData(parsed_function->function());
@ -1063,6 +1088,7 @@ class CallSiteInliner : public ValueObject {
if (!CompilerState::Current().is_aot() && function.is_intrinsic()) {
callee_graph->PopulateWithICData(parsed_function->function());
}
}
// The parameter stubs are a copy of the actual arguments providing
// concrete information about the values, for example constant values,
@ -1127,6 +1153,7 @@ class CallSiteInliner : public ValueObject {
{
// Compute SSA on the callee graph, catching bailouts.
COMPILER_TIMINGS_TIMER_SCOPE(thread(), ComputeSSA);
callee_graph->ComputeSSA(caller_graph_->max_virtual_register_number(),
param_stubs);
#if defined(DEBUG)
@ -1196,46 +1223,54 @@ class CallSiteInliner : public ValueObject {
&call_site_count);
// Use heuristics do decide if this call should be inlined.
InliningDecision decision =
ShouldWeInline(function, instruction_count, call_site_count);
if (!decision.value) {
// If size is larger than all thresholds, don't consider it again.
if ((instruction_count > FLAG_inlining_size_threshold) &&
(call_site_count > FLAG_inlining_callee_call_sites_threshold)) {
function.set_is_inlinable(false);
}
TRACE_INLINING(
THR_Print(" Bailout: heuristics (%s) with "
"code size: %" Pd ", "
"call sites: %" Pd ", "
"inlining depth of callee: %d, "
"const args: %" Pd "\n",
decision.reason, instruction_count, call_site_count,
function.inlining_depth(), constants_count));
PRINT_INLINING_TREE("Heuristic fail", &call_data->caller, &function,
call_data->call);
return false;
}
// If requested, a stricter heuristic is applied to this inlining. This
// heuristic always scans the method (rather than possibly reusing
// cached results) to make sure all specializations are accounted for.
// TODO(ajcbik): with the now better bookkeeping, explore removing this
if (stricter_heuristic) {
if (!IsSmallLeaf(callee_graph)) {
{
COMPILER_TIMINGS_TIMER_SCOPE(thread(), MakeInliningDecision);
InliningDecision decision =
ShouldWeInline(function, instruction_count, call_site_count);
if (!decision.value) {
// If size is larger than all thresholds, don't consider it again.
if ((instruction_count > FLAG_inlining_size_threshold) &&
(call_site_count > FLAG_inlining_callee_call_sites_threshold)) {
function.set_is_inlinable(false);
}
TRACE_INLINING(
THR_Print(" Bailout: heuristics (no small leaf)\n"));
PRINT_INLINING_TREE("Heuristic fail (no small leaf)",
&call_data->caller, &function, call_data->call);
THR_Print(" Bailout: heuristics (%s) with "
"code size: %" Pd ", "
"call sites: %" Pd ", "
"inlining depth of callee: %d, "
"const args: %" Pd "\n",
decision.reason, instruction_count, call_site_count,
function.inlining_depth(), constants_count));
PRINT_INLINING_TREE("Heuristic fail", &call_data->caller, &function,
call_data->call);
return false;
}
// If requested, a stricter heuristic is applied to this inlining.
// This heuristic always scans the method (rather than possibly
// reusing cached results) to make sure all specializations are
// accounted for.
// TODO(ajcbik): with the now better bookkeeping, explore removing
// this
if (stricter_heuristic) {
if (!IsSmallLeaf(callee_graph)) {
TRACE_INLINING(
THR_Print(" Bailout: heuristics (no small leaf)\n"));
PRINT_INLINING_TREE("Heuristic fail (no small leaf)",
&call_data->caller, &function,
call_data->call);
return false;
}
}
}
// Inline dispatcher methods regardless of the current depth.
const intptr_t depth =
function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_;
collected_call_sites_->FindCallSites(callee_graph, depth,
&inlined_info_);
{
const intptr_t depth =
function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_;
collected_call_sites_->FindCallSites(callee_graph, depth,
&inlined_info_);
}
// Add the function to the cache.
if (!in_cache) {
@ -1262,9 +1297,12 @@ class CallSiteInliner : public ValueObject {
callee_guarded_fields[i]);
}
FlowGraphInliner::SetInliningId(
callee_graph, inliner_->NextInlineId(callee_graph->function(),
call_data->call->source()));
{
COMPILER_TIMINGS_TIMER_SCOPE(thread(), SetInliningId);
FlowGraphInliner::SetInliningId(
callee_graph, inliner_->NextInlineId(callee_graph->function(),
call_data->call->source()));
}
TRACE_INLINING(THR_Print(" Success\n"));
TRACE_INLINING(THR_Print(
" with reason %s, code size %" Pd ", call sites: %" Pd "\n",
@ -1365,6 +1403,7 @@ class CallSiteInliner : public ValueObject {
}
void InlineCall(InlinedCallData* call_data) {
COMPILER_TIMINGS_TIMER_SCOPE(thread(), InlineCall);
FlowGraph* callee_graph = call_data->callee_graph;
auto callee_function_entry = callee_graph->graph_entry()->normal_entry();
@ -1845,6 +1884,7 @@ bool PolymorphicInliner::TryInlineRecognizedMethod(intptr_t receiver_cid,
// If not all variants are inlined, we add a PolymorphicInstanceCall
// instruction to handle the non-inlined variants.
TargetEntryInstr* PolymorphicInliner::BuildDecisionGraph() {
COMPILER_TIMINGS_TIMER_SCOPE(owner_->thread(), BuildDecisionGraph);
const intptr_t try_idx = call_->GetBlock()->try_index();
// Start with a fresh target entry.
@ -2204,6 +2244,7 @@ void FlowGraphInliner::CollectGraphInfo(FlowGraph* flow_graph,
bool force,
intptr_t* instruction_count,
intptr_t* call_site_count) {
COMPILER_TIMINGS_TIMER_SCOPE(flow_graph->thread(), CollectGraphInfo);
const Function& function = flow_graph->function();
// For OSR, don't even bother.
if (flow_graph->IsCompiledForOsr()) {
@ -2259,16 +2300,20 @@ static bool IsInlineableOperator(const Function& function) {
}
bool FlowGraphInliner::FunctionHasPreferInlinePragma(const Function& function) {
Thread* thread = dart::Thread::Current();
COMPILER_TIMINGS_TIMER_SCOPE(thread, CheckForPragma);
Object& options = Object::Handle();
return Library::FindPragma(dart::Thread::Current(), /*only_core=*/false,
function, Symbols::vm_prefer_inline(),
return Library::FindPragma(thread, /*only_core=*/false, function,
Symbols::vm_prefer_inline(),
/*multiple=*/false, &options);
}
bool FlowGraphInliner::FunctionHasNeverInlinePragma(const Function& function) {
Thread* thread = dart::Thread::Current();
COMPILER_TIMINGS_TIMER_SCOPE(thread, CheckForPragma);
Object& options = Object::Handle();
return Library::FindPragma(dart::Thread::Current(), /*only_core=*/false,
function, Symbols::vm_never_inline(),
return Library::FindPragma(thread, /*only_core=*/false, function,
Symbols::vm_never_inline(),
/*multiple=*/false, &options);
}
@ -2279,6 +2324,7 @@ bool FlowGraphInliner::AlwaysInline(const Function& function) {
return true;
}
COMPILER_TIMINGS_TIMER_SCOPE(dart::Thread::Current(), MakeInliningDecision);
// We don't want to inline DIFs for recognized methods because we would rather
// replace them with inline FG before inlining introduces any superfluous
// AssertAssignable instructions.
@ -3619,6 +3665,8 @@ bool FlowGraphInliner::TryInlineRecognizedMethod(
Definition** result,
SpeculativeInliningPolicy* policy,
FlowGraphInliner::ExactnessInfo* exactness) {
COMPILER_TIMINGS_TIMER_SCOPE(flow_graph->thread(), InlineRecognizedMethod);
if (receiver_cid == kNeverCid) {
// Receiver was defined in dead code and was replaced by the sentinel.
// Original receiver cid is lost, so don't try to inline recognized

View file

@ -15,11 +15,13 @@
#include "vm/compiler/backend/redundancy_elimination.h"
#include "vm/compiler/backend/type_propagator.h"
#include "vm/compiler/call_specializer.h"
#include "vm/compiler/compiler_timings.h"
#include "vm/compiler/write_barrier_elimination.h"
#if defined(DART_PRECOMPILER)
#include "vm/compiler/aot/aot_call_specializer.h"
#include "vm/compiler/aot/precompiler.h"
#endif
#include "vm/thread.h"
#include "vm/timeline.h"
#define COMPILER_PASS_REPEAT(Name, Body) \
@ -207,7 +209,10 @@ void CompilerPass::Run(CompilerPassState* state) const {
PrintGraph(state, kTraceBefore, round);
{
TIMELINE_DURATION(thread, CompilerVerbose, name());
repeat = DoBody(state);
{
COMPILER_TIMINGS_PASS_TIMER_SCOPE(thread, id());
repeat = DoBody(state);
}
thread->CheckForSafepoint();
}
PrintGraph(state, kTraceAfter, round);

View file

@ -12,6 +12,7 @@
#include <initializer_list>
#include "vm/growable_array.h"
#include "vm/timer.h"
#include "vm/token_position.h"
#include "vm/zone.h"
@ -110,7 +111,7 @@ class CompilerPass {
static constexpr intptr_t kNumPasses = 0 COMPILER_PASS_LIST(ADD_ONE);
#undef ADD_ONE
CompilerPass(Id id, const char* name) : name_(name), flags_(0) {
CompilerPass(Id id, const char* name) : id_(id), name_(name), flags_(0) {
ASSERT(passes_[id] == NULL);
passes_[id] = this;
@ -133,6 +134,7 @@ class CompilerPass {
intptr_t flags() const { return flags_; }
const char* name() const { return name_; }
Id id() const { return id_; }
bool IsFlagSet(Flag flag) const { return (flags() & flag) != 0; }
@ -187,6 +189,7 @@ class CompilerPass {
static CompilerPass* passes_[];
Id id_;
const char* name_;
intptr_t flags_;
};

View file

@ -87,6 +87,8 @@ compiler_sources = [
"compiler_pass.h",
"compiler_state.cc",
"compiler_state.h",
"compiler_timings.cc",
"compiler_timings.h",
"ffi/abi.cc",
"ffi/abi.h",
"ffi/call.cc",

View file

@ -13,6 +13,7 @@
#include "vm/compiler/cha.h"
#include "vm/heap/safepoint.h"
#include "vm/thread.h"
#include "vm/timer.h"
namespace dart {

View file

@ -0,0 +1,106 @@
// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
#include "vm/compiler/compiler_timings.h"
namespace dart {
namespace {
#define DEFINE_NAME(Name) #Name,
const char* timer_names[] = {COMPILER_TIMERS_LIST(DEFINE_NAME)};
#undef DEFINE_NAME
} // namespace
void CompilerTimings::PrintTimers(
Zone* zone,
const std::unique_ptr<CompilerTimings::Timers>& timers,
const Timer& total,
intptr_t level) {
const int64_t total_elapsed = total.TotalElapsedTime();
int64_t total_accounted = 0;
int64_t total_accounted_cpu = 0;
for (intptr_t i = 0; i < kNumTimers; i++) {
const auto& entry = timers->timers_[i];
total_accounted += entry.TotalElapsedTime();
total_accounted_cpu += entry.TotalElapsedTimeCpu();
}
if (level > 0) {
// Print (self: ...) timing on the previous line if our amount of time
// spent outside of nested timers is between 1% and 99%.
const double kMinInterestingSelfPercent = 1.0;
const double kMaxInterestingSelfPercent = 99.0;
const int64_t self = total_elapsed - total_accounted;
const auto self_pct =
static_cast<double>(Utils::Abs(self)) * 100 / total_elapsed;
if (kMinInterestingSelfPercent < self_pct &&
self_pct < kMaxInterestingSelfPercent) {
const int64_t self_cpu =
total.TotalElapsedTimeCpu() - total_accounted_cpu;
const auto self_timer = Timer(self, self_cpu);
OS::PrintErr(" (self: [%6.2f%%] %s)\n", self_pct,
self_timer.FormatElapsedHumanReadable(zone));
} else {
OS::PrintErr("\n");
}
}
// Sort timers by the amount of time elapsed within each one.
Timer* by_elapsed[kNumTimers];
for (intptr_t i = 0; i < kNumTimers; i++) {
by_elapsed[i] = &timers->timers_[i];
}
qsort(by_elapsed, kNumTimers, sizeof(Timer*),
[](const void* pa, const void* pb) -> int {
const auto a_elapsed =
(*static_cast<Timer* const*>(pa))->TotalElapsedTime();
const auto b_elapsed =
(*static_cast<Timer* const*>(pb))->TotalElapsedTime();
return b_elapsed < a_elapsed ? -1 : b_elapsed > a_elapsed ? 1 : 0;
});
// Print sorted in descending order.
for (intptr_t i = 0; i < kNumTimers; i++) {
auto timer = by_elapsed[i];
if (timer->TotalElapsedTime() > 0) {
const auto timer_id = static_cast<TimerId>(timer - &timers->timers_[0]);
const auto pct =
static_cast<double>(timer->TotalElapsedTime()) * 100 / total_elapsed;
// Must be int for * width specifier.
const int indent = static_cast<int>(level * 2);
// Note: don't emit EOL because PrintTimers can print self timing.
OS::PrintErr(
"%*s[%6.2f%%] %-*s %-10s", indent, "", pct, 60 - indent,
timer_names[timer_id],
Timer::FormatElapsedHumanReadable(zone, timer->TotalElapsedTime(),
timer->TotalElapsedTimeCpu()));
// Print nested timers if any or just emit EOL.
if (timers->nested_[timer_id] != nullptr) {
PrintTimers(zone, timers->nested_[timer_id], *timer, level + 1);
} else {
OS::PrintErr("\n");
}
}
}
}
void CompilerTimings::Print() {
Zone* zone = Thread::Current()->zone();
OS::PrintErr("Precompilation took: %s\n",
total_.FormatElapsedHumanReadable(zone));
PrintTimers(zone, root_, total_, 0);
OS::PrintErr("Inlining by outcome\n Success: %s\n Failure: %s\n",
try_inlining_success_.FormatElapsedHumanReadable(zone),
try_inlining_failure_.FormatElapsedHumanReadable(zone));
}
} // namespace dart

View file

@ -0,0 +1,160 @@
// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
#ifndef RUNTIME_VM_COMPILER_COMPILER_TIMINGS_H_
#define RUNTIME_VM_COMPILER_COMPILER_TIMINGS_H_
#include <memory>
#include "platform/allocation.h"
#include "vm/compiler/compiler_pass.h"
#include "vm/thread.h"
#include "vm/timer.h"
#if defined(DART_PRECOMPILED_RUNTIME)
#error "AOT runtime should not use compiler sources (including header files)"
#endif // defined(DART_PRECOMPILED_RUNTIME)
#define PRECOMPILER_TIMERS_LIST(V) \
V(CompileAll) \
V(Iterate) \
V(CompileFunction) \
V(PrecompileConstructors) \
V(AttachOptimizedTypeTestingStub) \
V(TraceForRetainedFunctions) \
V(FinalizeDispatchTable) \
V(ReplaceFunctionStaticCallEntries) \
V(Drop) \
V(Obfuscate) \
V(Dedup) \
V(SymbolsCompact)
#define INLINING_TIMERS_LIST(V) \
V(CollectGraphInfo) \
V(PopulateWithICData) \
V(FindCallSites) \
V(SetInliningId) \
V(MakeInliningDecision) \
V(CheckForPragma) \
V(InlineCall) \
V(InlineRecognizedMethod) \
V(DiscoverBlocks) \
V(BuildDecisionGraph) \
V(PrepareGraphs)
// Note: COMPILER_PASS_LIST must be the first element of the list below because
// we expect that pass ids are the same as ids of corresponding timers.
#define COMPILER_TIMERS_LIST(V) \
COMPILER_PASS_LIST(V) \
PRECOMPILER_TIMERS_LIST(V) \
INLINING_TIMERS_LIST(V) \
V(BuildGraph) \
V(EmitCode) \
V(FinalizeCode)
namespace dart {
// |CompilerTimings| provides a way to track time taken by various compiler
// passes via a fixed number of timers (specified in |COMPILER_TIMERS_LIST|).
//
// It supports arbitrary nesting of timers e.g. if |DiscoverBlocks| is invoked
// within two different compiler passes like |IfConvert| and |BranchSimplify|
// then |CompilerTimings| will separate these two invocations and measure each
// separately.
class CompilerTimings : public MallocAllocated {
private:
#define INC(Name) +1
static constexpr intptr_t kNumTimers = 0 COMPILER_TIMERS_LIST(INC);
#undef INC
struct Timers : public MallocAllocated {
Timer timers_[kNumTimers];
std::unique_ptr<Timers> nested_[kNumTimers];
};
public:
enum TimerId {
#define DECLARE_TIMER_ID(Name) k##Name,
COMPILER_TIMERS_LIST(DECLARE_TIMER_ID)
#undef DECLARE_TIMER_ID
};
// Timing scope which starts and stop the timer with the given |id|.
class Scope : public StackResource {
public:
Scope(Thread* thread, TimerId id)
: StackResource(thread), stats_(thread->compiler_timings()) {
if (stats_ != nullptr) {
outer_nested_ = stats_->nested_;
if (*outer_nested_ == nullptr) {
// Created array of nested timers if we don't have one yet.
*outer_nested_ = std::make_unique<Timers>();
}
timer_ = &(*outer_nested_)->timers_[id];
stats_->nested_ = &(*outer_nested_)->nested_[id];
timer_->Start();
}
}
~Scope() {
if (stats_ != nullptr) {
timer_->Stop();
stats_->nested_ = outer_nested_;
}
}
private:
CompilerTimings* const stats_;
Timer* timer_;
std::unique_ptr<Timers>* outer_nested_;
};
CompilerTimings() { total_.Start(); }
void RecordInliningStatsByOutcome(bool success, const Timer& timer) {
if (success) {
try_inlining_success_.AddTotal(timer);
} else {
try_inlining_failure_.AddTotal(timer);
}
}
void Print();
private:
void PrintTimers(Zone* zone,
const std::unique_ptr<CompilerTimings::Timers>& timers,
const Timer& total,
intptr_t level);
Timer total_;
std::unique_ptr<Timers> root_ = std::make_unique<Timers>();
// Timers nested under the currently running timer(s).
std::unique_ptr<Timers>* nested_ = &root_;
Timer try_inlining_success_;
Timer try_inlining_failure_;
};
#define TIMER_SCOPE_NAME2(counter) timer_scope_##counter
#define TIMER_SCOPE_NAME(counter) TIMER_SCOPE_NAME2(counter)
#define COMPILER_TIMINGS_TIMER_SCOPE(thread, timer_id) \
CompilerTimings::Scope TIMER_SCOPE_NAME(__COUNTER__)( \
thread, CompilerTimings::k##timer_id)
#define COMPILER_TIMINGS_PASS_TIMER_SCOPE(thread, pass_id) \
CompilerTimings::Scope TIMER_SCOPE_NAME(__COUNTER__)( \
thread, static_cast<CompilerTimings::TimerId>(pass_id))
#define PRECOMPILER_TIMER_SCOPE(precompiler, timer_id) \
CompilerTimings::Scope TIMER_SCOPE_NAME(__COUNTER__)( \
(precompiler)->thread(), CompilerTimings::k##timer_id)
} // namespace dart
#endif // RUNTIME_VM_COMPILER_COMPILER_TIMINGS_H_

View file

@ -7,6 +7,7 @@
#include "vm/compiler/backend/branch_optimizer.h"
#include "vm/compiler/backend/flow_graph.h"
#include "vm/compiler/backend/il.h"
#include "vm/compiler/compiler_timings.h"
#include "vm/compiler/frontend/kernel_to_il.h"
#include "vm/object.h"
#include "vm/zone.h"
@ -37,6 +38,7 @@ uword FindDoubleConstant(double value) {
}
void InlineExitCollector::PrepareGraphs(FlowGraph* callee_graph) {
COMPILER_TIMINGS_TIMER_SCOPE(callee_graph->thread(), PrepareGraphs);
ASSERT(callee_graph->graph_entry()->SuccessorCount() == 1);
ASSERT(callee_graph->max_block_id() > caller_graph_->max_block_id());
ASSERT(callee_graph->max_virtual_register_number() >

View file

@ -704,7 +704,7 @@ static ObjectPtr CompileFunctionHelper(CompilationPipeline* pipeline,
Zone* const zone = stack_zone.GetZone();
const bool trace_compiler =
FLAG_trace_compiler || (FLAG_trace_optimizing_compiler && optimized);
Timer per_compile_timer(trace_compiler, "Compilation time");
Timer per_compile_timer;
per_compile_timer.Start();
ParsedFunction* parsed_function = new (zone)

View file

@ -35,7 +35,7 @@ class ScopeTimer : public ValueObject {
}
int64_t elapsed = GetElapsed();
double seconds = MicrosecondsToSeconds(elapsed);
OS::PrintErr("%s: %f seconds (%" Pd64 " \u00B5s)\n", name_, seconds,
OS::PrintErr("%s: %.2f seconds (%" Pd64 " \u00B5s)\n", name_, seconds,
elapsed);
}

View file

@ -726,7 +726,7 @@ VM_UNIT_TEST_CASE(FullSnapshot) {
uint8_t* isolate_snapshot_data_buffer;
// Start an Isolate, load a script and create a full snapshot.
Timer timer1(true, "Snapshot_test");
Timer timer1;
timer1.Start();
{
TestIsolateScope __test_isolate__;
@ -760,7 +760,7 @@ VM_UNIT_TEST_CASE(FullSnapshot) {
// Now Create another isolate using the snapshot and execute a method
// from the script.
Timer timer2(true, "Snapshot_test");
Timer timer2;
timer2.Start();
TestCase::CreateTestIsolateFromSnapshot(isolate_snapshot_data_buffer);
{

View file

@ -32,6 +32,7 @@ class AbstractType;
class ApiLocalScope;
class Array;
class CompilerState;
class CompilerTimings;
class Class;
class Code;
class Error;
@ -506,6 +507,12 @@ class Thread : public ThreadState {
type_usage_info_ = value;
}
CompilerTimings* compiler_timings() const { return compiler_timings_; }
void set_compiler_timings(CompilerTimings* stats) {
compiler_timings_ = stats;
}
int32_t no_callback_scope_depth() const { return no_callback_scope_depth_; }
void IncrementNoCallbackScopeDepth() {
@ -1117,6 +1124,8 @@ class Thread : public ThreadState {
TypeUsageInfo* type_usage_info_;
GrowableObjectArrayPtr pending_functions_;
CompilerTimings* compiler_timings_ = nullptr;
ErrorPtr sticky_error_;
Random thread_random_;

View file

@ -13,17 +13,24 @@
namespace dart {
struct MeasureMonotonic {
static inline int64_t Now() { return OS::GetCurrentMonotonicMicros(); }
};
struct MeasureCpu {
static inline int64_t Now() { return OS::GetCurrentThreadCPUMicros(); }
};
// Timer class allows timing of specific operations in the VM.
class Timer : public ValueObject {
template <typename Measure>
class TimerImpl : public ValueObject {
public:
Timer(bool report, const char* message) : report_(report), message_(message) {
Reset();
}
~Timer() {}
TimerImpl() { Reset(); }
~TimerImpl() {}
// Start timer.
void Start() {
start_ = OS::GetCurrentMonotonicMicros();
start_ = Measure::Now();
running_ = true;
}
@ -31,7 +38,7 @@ class Timer : public ValueObject {
void Stop() {
ASSERT(start_ != 0);
ASSERT(running());
stop_ = OS::GetCurrentMonotonicMicros();
stop_ = Measure::Now();
int64_t elapsed = ElapsedMicros();
max_contiguous_ = Utils::Maximum(max_contiguous_.load(), elapsed);
// Make increment atomic in case it occurs in parallel with aggregation.
@ -43,7 +50,7 @@ class Timer : public ValueObject {
int64_t TotalElapsedTime() const {
int64_t result = total_;
if (running_) {
int64_t now = OS::GetCurrentMonotonicMicros();
int64_t now = Measure::Now();
result += (now - start_);
}
return result;
@ -52,7 +59,7 @@ class Timer : public ValueObject {
int64_t MaxContiguous() const {
int64_t result = max_contiguous_;
if (running_) {
int64_t now = OS::GetCurrentMonotonicMicros();
int64_t now = Measure::Now();
result = Utils::Maximum(result, now - start_);
}
return result;
@ -71,14 +78,17 @@ class Timer : public ValueObject {
(max_contiguous_ == 0) && !running_;
}
void AddTotal(const Timer& other) { total_.fetch_add(other.total_); }
void AddTotal(const TimerImpl& other) { total_.fetch_add(other.total_); }
// Accessors.
bool report() const { return report_; }
bool running() const { return running_; }
const char* message() const { return message_; }
private:
friend class Timer;
explicit TimerImpl(int64_t elapsed)
: total_(elapsed), max_contiguous_(elapsed) {}
int64_t ElapsedMicros() const {
ASSERT(start_ != 0);
ASSERT(stop_ != 0);
@ -89,13 +99,105 @@ class Timer : public ValueObject {
RelaxedAtomic<int64_t> stop_;
RelaxedAtomic<int64_t> total_;
RelaxedAtomic<int64_t> max_contiguous_;
bool report_;
bool running_;
const char* message_;
bool running_ = false;
DISALLOW_COPY_AND_ASSIGN(TimerImpl);
};
class Timer : public ValueObject {
public:
Timer(int64_t elapsed, int64_t elapsed_cpu)
: monotonic_(elapsed), cpu_(elapsed) {}
Timer() { Reset(); }
~Timer() {}
// Start timer.
void Start() {
cpu_.Start();
monotonic_.Start();
}
// Stop timer.
void Stop() {
cpu_.Stop();
monotonic_.Stop();
}
// Get total cumulative elapsed time in micros.
int64_t TotalElapsedTime() const { return monotonic_.TotalElapsedTime(); }
int64_t TotalElapsedTimeCpu() const { return cpu_.TotalElapsedTime(); }
int64_t MaxContiguous() const { return monotonic_.MaxContiguous(); }
void Reset() {
monotonic_.Reset();
cpu_.Reset();
}
bool IsReset() const { return monotonic_.IsReset(); }
void AddTotal(const Timer& other) {
monotonic_.AddTotal(other.monotonic_);
cpu_.AddTotal(other.cpu_);
}
const char* FormatElapsedHumanReadable(Zone* zone) const {
return FormatElapsedHumanReadable(zone, TotalElapsedTime(),
TotalElapsedTimeCpu());
}
static const char* FormatTime(Zone* zone, int64_t total) {
if (total > kMicrosecondsPerSecond) {
return OS::SCreate(zone, "%6.2f s", MicrosecondsToSeconds(total));
} else if (total > kMicrosecondsPerMillisecond) {
return OS::SCreate(zone, "%6.2f ms", MicrosecondsToMilliseconds(total));
} else {
return OS::SCreate(zone, "%6" Pd64 " \u00B5s", total);
}
}
static constexpr double kCpuTimeReportingThreshold = 0.05;
// Formats the given monotonic and CPU times as a human readable string.
//
// CPU time is included into the formated string only if
// it is |kCpuTimeReportingThreshold| percent different from the monotonic
// time.
static const char* FormatElapsedHumanReadable(Zone* zone,
int64_t total_elapsed,
int64_t total_elapsed_cpu) {
if ((total_elapsed == 0) ||
static_cast<double>(Utils::Abs(total_elapsed - total_elapsed_cpu) /
total_elapsed) < kCpuTimeReportingThreshold) {
return FormatTime(zone, total_elapsed);
} else {
return OS::SCreate(zone, "%s (cpu %s)", FormatTime(zone, total_elapsed),
FormatTime(zone, total_elapsed_cpu));
}
}
private:
TimerImpl<MeasureMonotonic> monotonic_;
TimerImpl<MeasureCpu> cpu_;
DISALLOW_COPY_AND_ASSIGN(Timer);
};
class TimerScope : public StackResource {
public:
TimerScope(ThreadState* thread, Timer* timer)
: StackResource(thread), timer_(timer) {
if (timer_ != nullptr) timer_->Start();
}
~TimerScope() {
if (timer_ != nullptr) timer_->Stop();
}
private:
Timer* const timer_;
};
} // namespace dart
#endif // RUNTIME_VM_TIMER_H_