From 34add230f6eff9c511fa0f9a3a82e5e3e7b94ee1 Mon Sep 17 00:00:00 2001 From: Vyacheslav Egorov Date: Tue, 11 May 2021 11:15:53 +0000 Subject: [PATCH] [vm/compiler] Add --print-precompiler-timings This flag allows dumping timings of various precompiler passes. TEST=precompiled dart2js with --print-precompiler-timings Change-Id: I54d6fdf26c25a0e43ae4f2717e833e09e9321d81 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/198407 Commit-Queue: Vyacheslav Egorov Reviewed-by: Alexander Markov --- runtime/vm/benchmark_test.cc | 24 +-- runtime/vm/compiler/aot/precompiler.cc | 121 +++++++++---- runtime/vm/compiler/aot/precompiler.h | 10 +- runtime/vm/compiler/backend/flow_graph.cc | 3 + runtime/vm/compiler/backend/il.h | 1 - runtime/vm/compiler/backend/inliner.cc | 130 +++++++++----- runtime/vm/compiler/compiler_pass.cc | 7 +- runtime/vm/compiler/compiler_pass.h | 5 +- runtime/vm/compiler/compiler_sources.gni | 2 + runtime/vm/compiler/compiler_state.h | 1 + runtime/vm/compiler/compiler_timings.cc | 106 ++++++++++++ runtime/vm/compiler/compiler_timings.h | 160 ++++++++++++++++++ .../compiler/frontend/flow_graph_builder.cc | 2 + runtime/vm/compiler/jit/compiler.cc | 2 +- runtime/vm/scope_timer.h | 2 +- runtime/vm/snapshot_test.cc | 4 +- runtime/vm/thread.h | 9 + runtime/vm/timer.h | 132 +++++++++++++-- 18 files changed, 609 insertions(+), 112 deletions(-) create mode 100644 runtime/vm/compiler/compiler_timings.cc create mode 100644 runtime/vm/compiler/compiler_timings.h diff --git a/runtime/vm/benchmark_test.cc b/runtime/vm/benchmark_test.cc index cf5a69a1f45..ee64cc54554 100644 --- a/runtime/vm/benchmark_test.cc +++ b/runtime/vm/benchmark_test.cc @@ -48,7 +48,7 @@ BENCHMARK(CorelibCompileAll) { TransitionNativeToVM transition(thread); StackZone zone(thread); HANDLESCOPE(thread); - Timer timer(true, "Compile all of Core lib benchmark"); + Timer timer; timer.Start(); const Error& error = Error::Handle(Library::CompileAll(/*ignore_error=*/true)); @@ -92,7 +92,7 @@ static char* ComputeKernelServicePath(const char* arg) { // BENCHMARK(CorelibIsolateStartup) { const int kNumIterations = 1000; - Timer timer(true, "CorelibIsolateStartup"); + Timer timer; Isolate* isolate = thread->isolate(); Dart_ExitIsolate(); for (int i = 0; i < kNumIterations; i++) { @@ -192,7 +192,7 @@ BENCHMARK(UseDartApi) { result = Dart_Invoke(lib, NewString("benchmark"), 1, args); EXPECT_VALID(result); - Timer timer(true, "UseDartApi benchmark"); + Timer timer; timer.Start(); result = Dart_Invoke(lib, NewString("benchmark"), 1, args); EXPECT_VALID(result); @@ -208,7 +208,7 @@ static void NoopFinalizer(void* isolate_callback_data, void* peer) {} // BENCHMARK(DartStringAccess) { const int kNumIterations = 10000000; - Timer timer(true, "DartStringAccess benchmark"); + Timer timer; timer.Start(); Dart_EnterScope(); @@ -281,7 +281,7 @@ BENCHMARK(KernelServiceCompileAll) { result = Dart_FinalizeLoading(false); EXPECT_VALID(result); - Timer timer(true, "Compile all of kernel service benchmark"); + Timer timer; timer.Start(); #if !defined(PRODUCT) const bool old_flag = FLAG_background_compilation; @@ -303,7 +303,7 @@ BENCHMARK(KernelServiceCompileAll) { // Measure frame lookup during stack traversal. // static void StackFrame_accessFrame(Dart_NativeArguments args) { - Timer timer(true, "LookupDartCode benchmark"); + Timer timer; timer.Start(); { Thread* thread = Thread::Current(); @@ -474,7 +474,7 @@ BENCHMARK(CreateMirrorSystem) { Dart_Handle lib = TestCase::LoadTestScript(kScriptChars, NULL); - Timer timer(true, "currentMirrorSystem() benchmark"); + Timer timer; timer.Start(); Dart_Handle result = Dart_Invoke(lib, NewString("benchmark"), 0, NULL); EXPECT_VALID(result); @@ -496,7 +496,7 @@ BENCHMARK(EnterExitIsolate) { Api::CheckAndFinalizePendingClasses(thread); } Dart_Isolate isolate = Dart_CurrentIsolate(); - Timer timer(true, "Enter and Exit isolate"); + Timer timer; timer.Start(); for (intptr_t i = 0; i < kLoopCount; i++) { Dart_ExitIsolate(); @@ -513,7 +513,7 @@ BENCHMARK(SerializeNull) { HANDLESCOPE(thread); const Object& null_object = Object::Handle(); const intptr_t kLoopCount = 1000000; - Timer timer(true, "Serialize Null"); + Timer timer; timer.Start(); for (intptr_t i = 0; i < kLoopCount; i++) { StackZone zone(thread); @@ -536,7 +536,7 @@ BENCHMARK(SerializeSmi) { HANDLESCOPE(thread); const Integer& smi_object = Integer::Handle(Smi::New(42)); const intptr_t kLoopCount = 1000000; - Timer timer(true, "Serialize Smi"); + Timer timer; timer.Start(); for (intptr_t i = 0; i < kLoopCount; i++) { StackZone zone(thread); @@ -561,7 +561,7 @@ BENCHMARK(SimpleMessage) { array_object.SetAt(0, Integer::Handle(Smi::New(42))); array_object.SetAt(1, Object::Handle()); const intptr_t kLoopCount = 1000000; - Timer timer(true, "Simple Message"); + Timer timer; timer.Start(); for (intptr_t i = 0; i < kLoopCount; i++) { StackZone zone(thread); @@ -595,7 +595,7 @@ BENCHMARK(LargeMap) { Instance& map = Instance::Handle(); map ^= Api::UnwrapHandle(h_result); const intptr_t kLoopCount = 100; - Timer timer(true, "Large Map"); + Timer timer; timer.Start(); for (intptr_t i = 0; i < kLoopCount; i++) { StackZone zone(thread); diff --git a/runtime/vm/compiler/aot/precompiler.cc b/runtime/vm/compiler/aot/precompiler.cc index 56c13e88cbc..016356d0fa3 100644 --- a/runtime/vm/compiler/aot/precompiler.cc +++ b/runtime/vm/compiler/aot/precompiler.cc @@ -27,6 +27,7 @@ #include "vm/compiler/cha.h" #include "vm/compiler/compiler_pass.h" #include "vm/compiler/compiler_state.h" +#include "vm/compiler/compiler_timings.h" #include "vm/compiler/frontend/flow_graph_builder.h" #include "vm/compiler/frontend/kernel_to_il.h" #include "vm/compiler/jit/compiler.h" @@ -60,6 +61,10 @@ namespace dart { #define IG (isolate_group()) #define Z (zone()) +DEFINE_FLAG(bool, + print_precompiler_timings, + false, + "Print per-phase breakdown of time spent precompiling"); DEFINE_FLAG(bool, print_unique_targets, false, "Print unique dynamic targets"); DEFINE_FLAG(bool, print_gop, false, "Print global object pool"); DEFINE_FLAG(bool, trace_precompiler, false, "Trace precompiler."); @@ -331,12 +336,21 @@ ErrorPtr Precompiler::CompileAll() { if (setjmp(*jump.Set()) == 0) { Precompiler precompiler(Thread::Current()); precompiler.DoCompileAll(); + precompiler.ReportStats(); return Error::null(); } else { return Thread::Current()->StealStickyError(); } } +void Precompiler::ReportStats() { + if (!FLAG_print_precompiler_timings) { + return; + } + + thread()->compiler_timings()->Print(); +} + Precompiler::Precompiler(Thread* thread) : thread_(thread), zone_(NULL), @@ -378,6 +392,10 @@ Precompiler::Precompiler(Thread* thread) get_runtime_type_is_unique_(false) { ASSERT(Precompiler::singleton_ == NULL); Precompiler::singleton_ = this; + + if (FLAG_print_precompiler_timings) { + thread->set_compiler_timings(new CompilerTimings()); + } } Precompiler::~Precompiler() { @@ -389,9 +407,13 @@ Precompiler::~Precompiler() { ASSERT(Precompiler::singleton_ == this); Precompiler::singleton_ = NULL; + + delete thread()->compiler_timings(); + thread()->set_compiler_timings(nullptr); } void Precompiler::DoCompileAll() { + PRECOMPILER_TIMER_SCOPE(this, CompileAll); { StackZone stack_zone(T); zone_ = stack_zone.GetZone(); @@ -530,44 +552,60 @@ void Precompiler::DoCompileAll() { tracer_ = nullptr; } - TraceForRetainedFunctions(); + { + PRECOMPILER_TIMER_SCOPE(this, TraceForRetainedFunctions); + TraceForRetainedFunctions(); + } + FinalizeDispatchTable(); ReplaceFunctionStaticCallEntries(); - DropFunctions(); - DropFields(); - TraceTypesFromRetainedClasses(); - DropTypes(); - DropFunctionTypes(); - DropTypeParameters(); - DropTypeArguments(); + { + PRECOMPILER_TIMER_SCOPE(this, Drop); - // Clear these before dropping classes as they may hold onto otherwise - // dead instances of classes we will remove or otherwise unused symbols. - IG->object_store()->set_unique_dynamic_targets(Array::null_array()); - Class& null_class = Class::Handle(Z); - Function& null_function = Function::Handle(Z); - Field& null_field = Field::Handle(Z); - IG->object_store()->set_pragma_class(null_class); - IG->object_store()->set_pragma_name(null_field); - IG->object_store()->set_pragma_options(null_field); - IG->object_store()->set_completer_class(null_class); - IG->object_store()->set_symbol_class(null_class); - IG->object_store()->set_compiletime_error_class(null_class); - IG->object_store()->set_growable_list_factory(null_function); - IG->object_store()->set_simple_instance_of_function(null_function); - IG->object_store()->set_simple_instance_of_true_function(null_function); - IG->object_store()->set_simple_instance_of_false_function(null_function); - IG->object_store()->set_async_star_move_next_helper(null_function); - IG->object_store()->set_complete_on_async_return(null_function); - IG->object_store()->set_async_star_stream_controller(null_class); - DropMetadata(); - DropLibraryEntries(); + DropFunctions(); + DropFields(); + TraceTypesFromRetainedClasses(); + DropTypes(); + DropFunctionTypes(); + DropTypeParameters(); + DropTypeArguments(); + + // Clear these before dropping classes as they may hold onto otherwise + // dead instances of classes we will remove or otherwise unused symbols. + IG->object_store()->set_unique_dynamic_targets(Array::null_array()); + Class& null_class = Class::Handle(Z); + Function& null_function = Function::Handle(Z); + Field& null_field = Field::Handle(Z); + IG->object_store()->set_pragma_class(null_class); + IG->object_store()->set_pragma_name(null_field); + IG->object_store()->set_pragma_options(null_field); + IG->object_store()->set_completer_class(null_class); + IG->object_store()->set_symbol_class(null_class); + IG->object_store()->set_compiletime_error_class(null_class); + IG->object_store()->set_growable_list_factory(null_function); + IG->object_store()->set_simple_instance_of_function(null_function); + IG->object_store()->set_simple_instance_of_true_function(null_function); + IG->object_store()->set_simple_instance_of_false_function( + null_function); + IG->object_store()->set_async_star_move_next_helper(null_function); + IG->object_store()->set_complete_on_async_return(null_function); + IG->object_store()->set_async_star_stream_controller(null_class); + DropMetadata(); + DropLibraryEntries(); + } } - DropClasses(); - DropLibraries(); - Obfuscate(); + { + PRECOMPILER_TIMER_SCOPE(this, Drop); + DropClasses(); + DropLibraries(); + } + + { + PRECOMPILER_TIMER_SCOPE(this, Obfuscate); + Obfuscate(); + } #if defined(DEBUG) const auto& non_visited = @@ -578,7 +616,11 @@ void Precompiler::DoCompileAll() { } #endif DiscardCodeObjects(); - ProgramVisitor::Dedup(T); + + { + PRECOMPILER_TIMER_SCOPE(this, Dedup); + ProgramVisitor::Dedup(T); + } if (FLAG_write_retained_reasons_to != nullptr) { reasons_writer.Write(); @@ -614,6 +656,7 @@ void Precompiler::DoCompileAll() { } void Precompiler::PrecompileConstructors() { + PRECOMPILER_TIMER_SCOPE(this, PrecompileConstructors); class ConstructorVisitor : public FunctionVisitor { public: explicit ConstructorVisitor(Precompiler* precompiler, Zone* zone) @@ -683,6 +726,8 @@ void Precompiler::AddRoots() { } void Precompiler::Iterate() { + PRECOMPILER_TIMER_SCOPE(this, Iterate); + Function& function = Function::Handle(Z); phase_ = Phase::kFixpointCodeGeneration; @@ -792,6 +837,7 @@ void Precompiler::ProcessFunction(const Function& function) { if (!error_.IsNull()) { Jump(error_); } + // Used in the JIT to save type-feedback across compilations. function.ClearICDataArray(); AddCalleesOf(function, gop_offset); @@ -1886,6 +1932,7 @@ void Precompiler::TraceForRetainedFunctions() { } void Precompiler::FinalizeDispatchTable() { + PRECOMPILER_TIMER_SCOPE(this, FinalizeDispatchTable); if (!FLAG_use_bare_instructions || !FLAG_use_table_dispatch) return; // Build the entries used to serialize the dispatch table before // dropping functions, as we may clear references to Code objects. @@ -1918,6 +1965,7 @@ void Precompiler::FinalizeDispatchTable() { } void Precompiler::ReplaceFunctionStaticCallEntries() { + PRECOMPILER_TIMER_SCOPE(this, ReplaceFunctionStaticCallEntries); class StaticCallTableEntryFixer : public CodeVisitor { public: explicit StaticCallTableEntryFixer(Zone* zone) @@ -2162,6 +2210,7 @@ void Precompiler::DropFields() { } void Precompiler::AttachOptimizedTypeTestingStub() { + PRECOMPILER_TIMER_SCOPE(this, AttachOptimizedTypeTestingStub); IsolateGroup::Current()->heap()->CollectAllGarbage(); GrowableHandlePtrArray types(Z, 200); { @@ -3083,6 +3132,7 @@ bool PrecompileParsedFunctionHelper::Compile(CompilationPipeline* pipeline) { ic_data_array = new (zone) ZoneGrowableArray(); TIMELINE_DURATION(thread(), CompilerVerbose, "BuildFlowGraph"); + COMPILER_TIMINGS_TIMER_SCOPE(thread(), BuildGraph); flow_graph = pipeline->BuildFlowGraph(zone, parsed_function(), ic_data_array, Compiler::kNoOSRDeoptId, optimized()); @@ -3169,10 +3219,12 @@ bool PrecompileParsedFunctionHelper::Compile(CompilationPipeline* pipeline) { pass_state.inline_id_to_token_pos, pass_state.caller_inline_id, ic_data_array, function_stats); { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), EmitCode); TIMELINE_DURATION(thread(), CompilerVerbose, "CompileGraph"); graph_compiler.CompileGraph(); } { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), FinalizeCode); TIMELINE_DURATION(thread(), CompilerVerbose, "FinalizeCompilation"); ASSERT(thread()->IsMutatorThread()); FinalizeCompilation(&assembler, &graph_compiler, flow_graph, @@ -3282,7 +3334,7 @@ static ErrorPtr PrecompileFunctionHelper(Precompiler* precompiler, Zone* const zone = stack_zone.GetZone(); const bool trace_compiler = FLAG_trace_compiler || (FLAG_trace_optimizing_compiler && optimized); - Timer per_compile_timer(trace_compiler, "Compilation time"); + Timer per_compile_timer; per_compile_timer.Start(); ParsedFunction* parsed_function = new (zone) @@ -3346,6 +3398,7 @@ ErrorPtr Precompiler::CompileFunction(Precompiler* precompiler, Thread* thread, Zone* zone, const Function& function) { + PRECOMPILER_TIMER_SCOPE(precompiler, CompileFunction); NoActiveIsolateScope no_isolate_scope; VMTagScope tagScope(thread, VMTag::kCompileUnoptimizedTagId); diff --git a/runtime/vm/compiler/aot/precompiler.h b/runtime/vm/compiler/aot/precompiler.h index fba94130a2a..c5d14fcb138 100644 --- a/runtime/vm/compiler/aot/precompiler.h +++ b/runtime/vm/compiler/aot/precompiler.h @@ -16,6 +16,7 @@ #include "vm/hash_table.h" #include "vm/object.h" #include "vm/symbols.h" +#include "vm/timer.h" namespace dart { @@ -267,6 +268,10 @@ class Precompiler : public ValueObject { bool is_tracing() const { return is_tracing_; } + Thread* thread() const { return thread_; } + Zone* zone() const { return zone_; } + Isolate* isolate() const { return isolate_; } + private: static Precompiler* singleton_; @@ -289,6 +294,8 @@ class Precompiler : public ValueObject { explicit Precompiler(Thread* thread); ~Precompiler(); + void ReportStats(); + void DoCompileAll(); void AddRoots(); void AddAnnotatedRoots(); @@ -348,9 +355,6 @@ class Precompiler : public ValueObject { void FinalizeAllClasses(); - Thread* thread() const { return thread_; } - Zone* zone() const { return zone_; } - Isolate* isolate() const { return isolate_; } IsolateGroup* isolate_group() const { return thread_->isolate_group(); } Thread* thread_; diff --git a/runtime/vm/compiler/backend/flow_graph.cc b/runtime/vm/compiler/backend/flow_graph.cc index 2d890b42539..a24af4435d8 100644 --- a/runtime/vm/compiler/backend/flow_graph.cc +++ b/runtime/vm/compiler/backend/flow_graph.cc @@ -12,6 +12,7 @@ #include "vm/compiler/backend/range_analysis.h" #include "vm/compiler/cha.h" #include "vm/compiler/compiler_state.h" +#include "vm/compiler/compiler_timings.h" #include "vm/compiler/frontend/flow_graph_builder.h" #include "vm/growable_array.h" #include "vm/object_store.h" @@ -336,6 +337,8 @@ class BlockTraversalState { }; void FlowGraph::DiscoverBlocks() { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), DiscoverBlocks); + StackZone zone(thread()); // Initialize state. diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h index 50300c3ee71..90449e5ceec 100644 --- a/runtime/vm/compiler/backend/il.h +++ b/runtime/vm/compiler/backend/il.h @@ -3952,7 +3952,6 @@ class TemplateDartCall : public Definition { ArgumentsSizeWithoutTypeArgs(), argument_names()); } - private: virtual void RawSetInputAt(intptr_t i, Value* value) { (*inputs_)[i] = value; diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc index 4050f449a0c..20293d30317 100644 --- a/runtime/vm/compiler/backend/inliner.cc +++ b/runtime/vm/compiler/backend/inliner.cc @@ -13,6 +13,7 @@ #include "vm/compiler/backend/il_printer.h" #include "vm/compiler/backend/type_propagator.h" #include "vm/compiler/compiler_pass.h" +#include "vm/compiler/compiler_timings.h" #include "vm/compiler/frontend/flow_graph_builder.h" #include "vm/compiler/frontend/kernel_to_il.h" #include "vm/compiler/jit/compiler.h" @@ -440,6 +441,7 @@ class CallSites : public ValueObject { void FindCallSites(FlowGraph* graph, intptr_t depth, GrowableArray* inlined_info) { + COMPILER_TIMINGS_TIMER_SCOPE(graph->thread(), FindCallSites); ASSERT(graph != NULL); if (depth > inlining_depth_threshold_) { if (FLAG_print_inlining_tree) { @@ -891,6 +893,24 @@ class CallSiteInliner : public ValueObject { const Array& argument_names, InlinedCallData* call_data, bool stricter_heuristic) { + Timer timer; + if (thread()->compiler_timings() != nullptr) { + timer.Start(); + } + const bool success = TryInliningImpl(function, argument_names, call_data, + stricter_heuristic); + if (thread()->compiler_timings() != nullptr) { + timer.Stop(); + thread()->compiler_timings()->RecordInliningStatsByOutcome(success, + timer); + } + return success; + } + + bool TryInliningImpl(const Function& function, + const Array& argument_names, + InlinedCallData* call_data, + bool stricter_heuristic) { if (trace_inlining()) { String& name = String::Handle(function.QualifiedUserVisibleName()); THR_Print(" => %s (deopt count %d)\n", name.ToCString(), @@ -1040,6 +1060,7 @@ class CallSiteInliner : public ValueObject { caller_graph_->max_block_id() + 1, entry_kind == Code::EntryKind::kUnchecked); { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), BuildGraph); callee_graph = builder.BuildGraph(); #if defined(DEBUG) // The inlining IDs of instructions in the callee graph are unset @@ -1051,6 +1072,10 @@ class CallSiteInliner : public ValueObject { #endif CalleeGraphValidator::Validate(callee_graph); } + + { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), PopulateWithICData); + #if defined(DART_PRECOMPILER) && !defined(TARGET_ARCH_IA32) if (CompilerState::Current().is_aot()) { callee_graph->PopulateWithICData(parsed_function->function()); @@ -1063,6 +1088,7 @@ class CallSiteInliner : public ValueObject { if (!CompilerState::Current().is_aot() && function.is_intrinsic()) { callee_graph->PopulateWithICData(parsed_function->function()); } + } // The parameter stubs are a copy of the actual arguments providing // concrete information about the values, for example constant values, @@ -1127,6 +1153,7 @@ class CallSiteInliner : public ValueObject { { // Compute SSA on the callee graph, catching bailouts. + COMPILER_TIMINGS_TIMER_SCOPE(thread(), ComputeSSA); callee_graph->ComputeSSA(caller_graph_->max_virtual_register_number(), param_stubs); #if defined(DEBUG) @@ -1196,46 +1223,54 @@ class CallSiteInliner : public ValueObject { &call_site_count); // Use heuristics do decide if this call should be inlined. - InliningDecision decision = - ShouldWeInline(function, instruction_count, call_site_count); - if (!decision.value) { - // If size is larger than all thresholds, don't consider it again. - if ((instruction_count > FLAG_inlining_size_threshold) && - (call_site_count > FLAG_inlining_callee_call_sites_threshold)) { - function.set_is_inlinable(false); - } - TRACE_INLINING( - THR_Print(" Bailout: heuristics (%s) with " - "code size: %" Pd ", " - "call sites: %" Pd ", " - "inlining depth of callee: %d, " - "const args: %" Pd "\n", - decision.reason, instruction_count, call_site_count, - function.inlining_depth(), constants_count)); - PRINT_INLINING_TREE("Heuristic fail", &call_data->caller, &function, - call_data->call); - return false; - } - - // If requested, a stricter heuristic is applied to this inlining. This - // heuristic always scans the method (rather than possibly reusing - // cached results) to make sure all specializations are accounted for. - // TODO(ajcbik): with the now better bookkeeping, explore removing this - if (stricter_heuristic) { - if (!IsSmallLeaf(callee_graph)) { + { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), MakeInliningDecision); + InliningDecision decision = + ShouldWeInline(function, instruction_count, call_site_count); + if (!decision.value) { + // If size is larger than all thresholds, don't consider it again. + if ((instruction_count > FLAG_inlining_size_threshold) && + (call_site_count > FLAG_inlining_callee_call_sites_threshold)) { + function.set_is_inlinable(false); + } TRACE_INLINING( - THR_Print(" Bailout: heuristics (no small leaf)\n")); - PRINT_INLINING_TREE("Heuristic fail (no small leaf)", - &call_data->caller, &function, call_data->call); + THR_Print(" Bailout: heuristics (%s) with " + "code size: %" Pd ", " + "call sites: %" Pd ", " + "inlining depth of callee: %d, " + "const args: %" Pd "\n", + decision.reason, instruction_count, call_site_count, + function.inlining_depth(), constants_count)); + PRINT_INLINING_TREE("Heuristic fail", &call_data->caller, &function, + call_data->call); return false; } + + // If requested, a stricter heuristic is applied to this inlining. + // This heuristic always scans the method (rather than possibly + // reusing cached results) to make sure all specializations are + // accounted for. + // TODO(ajcbik): with the now better bookkeeping, explore removing + // this + if (stricter_heuristic) { + if (!IsSmallLeaf(callee_graph)) { + TRACE_INLINING( + THR_Print(" Bailout: heuristics (no small leaf)\n")); + PRINT_INLINING_TREE("Heuristic fail (no small leaf)", + &call_data->caller, &function, + call_data->call); + return false; + } + } } // Inline dispatcher methods regardless of the current depth. - const intptr_t depth = - function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_; - collected_call_sites_->FindCallSites(callee_graph, depth, - &inlined_info_); + { + const intptr_t depth = + function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_; + collected_call_sites_->FindCallSites(callee_graph, depth, + &inlined_info_); + } // Add the function to the cache. if (!in_cache) { @@ -1262,9 +1297,12 @@ class CallSiteInliner : public ValueObject { callee_guarded_fields[i]); } - FlowGraphInliner::SetInliningId( - callee_graph, inliner_->NextInlineId(callee_graph->function(), - call_data->call->source())); + { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), SetInliningId); + FlowGraphInliner::SetInliningId( + callee_graph, inliner_->NextInlineId(callee_graph->function(), + call_data->call->source())); + } TRACE_INLINING(THR_Print(" Success\n")); TRACE_INLINING(THR_Print( " with reason %s, code size %" Pd ", call sites: %" Pd "\n", @@ -1365,6 +1403,7 @@ class CallSiteInliner : public ValueObject { } void InlineCall(InlinedCallData* call_data) { + COMPILER_TIMINGS_TIMER_SCOPE(thread(), InlineCall); FlowGraph* callee_graph = call_data->callee_graph; auto callee_function_entry = callee_graph->graph_entry()->normal_entry(); @@ -1845,6 +1884,7 @@ bool PolymorphicInliner::TryInlineRecognizedMethod(intptr_t receiver_cid, // If not all variants are inlined, we add a PolymorphicInstanceCall // instruction to handle the non-inlined variants. TargetEntryInstr* PolymorphicInliner::BuildDecisionGraph() { + COMPILER_TIMINGS_TIMER_SCOPE(owner_->thread(), BuildDecisionGraph); const intptr_t try_idx = call_->GetBlock()->try_index(); // Start with a fresh target entry. @@ -2204,6 +2244,7 @@ void FlowGraphInliner::CollectGraphInfo(FlowGraph* flow_graph, bool force, intptr_t* instruction_count, intptr_t* call_site_count) { + COMPILER_TIMINGS_TIMER_SCOPE(flow_graph->thread(), CollectGraphInfo); const Function& function = flow_graph->function(); // For OSR, don't even bother. if (flow_graph->IsCompiledForOsr()) { @@ -2259,16 +2300,20 @@ static bool IsInlineableOperator(const Function& function) { } bool FlowGraphInliner::FunctionHasPreferInlinePragma(const Function& function) { + Thread* thread = dart::Thread::Current(); + COMPILER_TIMINGS_TIMER_SCOPE(thread, CheckForPragma); Object& options = Object::Handle(); - return Library::FindPragma(dart::Thread::Current(), /*only_core=*/false, - function, Symbols::vm_prefer_inline(), + return Library::FindPragma(thread, /*only_core=*/false, function, + Symbols::vm_prefer_inline(), /*multiple=*/false, &options); } bool FlowGraphInliner::FunctionHasNeverInlinePragma(const Function& function) { + Thread* thread = dart::Thread::Current(); + COMPILER_TIMINGS_TIMER_SCOPE(thread, CheckForPragma); Object& options = Object::Handle(); - return Library::FindPragma(dart::Thread::Current(), /*only_core=*/false, - function, Symbols::vm_never_inline(), + return Library::FindPragma(thread, /*only_core=*/false, function, + Symbols::vm_never_inline(), /*multiple=*/false, &options); } @@ -2279,6 +2324,7 @@ bool FlowGraphInliner::AlwaysInline(const Function& function) { return true; } + COMPILER_TIMINGS_TIMER_SCOPE(dart::Thread::Current(), MakeInliningDecision); // We don't want to inline DIFs for recognized methods because we would rather // replace them with inline FG before inlining introduces any superfluous // AssertAssignable instructions. @@ -3619,6 +3665,8 @@ bool FlowGraphInliner::TryInlineRecognizedMethod( Definition** result, SpeculativeInliningPolicy* policy, FlowGraphInliner::ExactnessInfo* exactness) { + COMPILER_TIMINGS_TIMER_SCOPE(flow_graph->thread(), InlineRecognizedMethod); + if (receiver_cid == kNeverCid) { // Receiver was defined in dead code and was replaced by the sentinel. // Original receiver cid is lost, so don't try to inline recognized diff --git a/runtime/vm/compiler/compiler_pass.cc b/runtime/vm/compiler/compiler_pass.cc index 1dfcdcea9c1..d6071ee6402 100644 --- a/runtime/vm/compiler/compiler_pass.cc +++ b/runtime/vm/compiler/compiler_pass.cc @@ -15,11 +15,13 @@ #include "vm/compiler/backend/redundancy_elimination.h" #include "vm/compiler/backend/type_propagator.h" #include "vm/compiler/call_specializer.h" +#include "vm/compiler/compiler_timings.h" #include "vm/compiler/write_barrier_elimination.h" #if defined(DART_PRECOMPILER) #include "vm/compiler/aot/aot_call_specializer.h" #include "vm/compiler/aot/precompiler.h" #endif +#include "vm/thread.h" #include "vm/timeline.h" #define COMPILER_PASS_REPEAT(Name, Body) \ @@ -207,7 +209,10 @@ void CompilerPass::Run(CompilerPassState* state) const { PrintGraph(state, kTraceBefore, round); { TIMELINE_DURATION(thread, CompilerVerbose, name()); - repeat = DoBody(state); + { + COMPILER_TIMINGS_PASS_TIMER_SCOPE(thread, id()); + repeat = DoBody(state); + } thread->CheckForSafepoint(); } PrintGraph(state, kTraceAfter, round); diff --git a/runtime/vm/compiler/compiler_pass.h b/runtime/vm/compiler/compiler_pass.h index 39dc9bd3c77..1438db8ed6a 100644 --- a/runtime/vm/compiler/compiler_pass.h +++ b/runtime/vm/compiler/compiler_pass.h @@ -12,6 +12,7 @@ #include #include "vm/growable_array.h" +#include "vm/timer.h" #include "vm/token_position.h" #include "vm/zone.h" @@ -110,7 +111,7 @@ class CompilerPass { static constexpr intptr_t kNumPasses = 0 COMPILER_PASS_LIST(ADD_ONE); #undef ADD_ONE - CompilerPass(Id id, const char* name) : name_(name), flags_(0) { + CompilerPass(Id id, const char* name) : id_(id), name_(name), flags_(0) { ASSERT(passes_[id] == NULL); passes_[id] = this; @@ -133,6 +134,7 @@ class CompilerPass { intptr_t flags() const { return flags_; } const char* name() const { return name_; } + Id id() const { return id_; } bool IsFlagSet(Flag flag) const { return (flags() & flag) != 0; } @@ -187,6 +189,7 @@ class CompilerPass { static CompilerPass* passes_[]; + Id id_; const char* name_; intptr_t flags_; }; diff --git a/runtime/vm/compiler/compiler_sources.gni b/runtime/vm/compiler/compiler_sources.gni index 1b237e9f879..51b81d1331b 100644 --- a/runtime/vm/compiler/compiler_sources.gni +++ b/runtime/vm/compiler/compiler_sources.gni @@ -87,6 +87,8 @@ compiler_sources = [ "compiler_pass.h", "compiler_state.cc", "compiler_state.h", + "compiler_timings.cc", + "compiler_timings.h", "ffi/abi.cc", "ffi/abi.h", "ffi/call.cc", diff --git a/runtime/vm/compiler/compiler_state.h b/runtime/vm/compiler/compiler_state.h index a200ea93249..db608f362ac 100644 --- a/runtime/vm/compiler/compiler_state.h +++ b/runtime/vm/compiler/compiler_state.h @@ -13,6 +13,7 @@ #include "vm/compiler/cha.h" #include "vm/heap/safepoint.h" #include "vm/thread.h" +#include "vm/timer.h" namespace dart { diff --git a/runtime/vm/compiler/compiler_timings.cc b/runtime/vm/compiler/compiler_timings.cc new file mode 100644 index 00000000000..b0314427e66 --- /dev/null +++ b/runtime/vm/compiler/compiler_timings.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#include "vm/compiler/compiler_timings.h" + +namespace dart { + +namespace { +#define DEFINE_NAME(Name) #Name, +const char* timer_names[] = {COMPILER_TIMERS_LIST(DEFINE_NAME)}; +#undef DEFINE_NAME + +} // namespace + +void CompilerTimings::PrintTimers( + Zone* zone, + const std::unique_ptr& timers, + const Timer& total, + intptr_t level) { + const int64_t total_elapsed = total.TotalElapsedTime(); + + int64_t total_accounted = 0; + int64_t total_accounted_cpu = 0; + for (intptr_t i = 0; i < kNumTimers; i++) { + const auto& entry = timers->timers_[i]; + total_accounted += entry.TotalElapsedTime(); + total_accounted_cpu += entry.TotalElapsedTimeCpu(); + } + + if (level > 0) { + // Print (self: ...) timing on the previous line if our amount of time + // spent outside of nested timers is between 1% and 99%. + const double kMinInterestingSelfPercent = 1.0; + const double kMaxInterestingSelfPercent = 99.0; + + const int64_t self = total_elapsed - total_accounted; + const auto self_pct = + static_cast(Utils::Abs(self)) * 100 / total_elapsed; + if (kMinInterestingSelfPercent < self_pct && + self_pct < kMaxInterestingSelfPercent) { + const int64_t self_cpu = + total.TotalElapsedTimeCpu() - total_accounted_cpu; + const auto self_timer = Timer(self, self_cpu); + OS::PrintErr(" (self: [%6.2f%%] %s)\n", self_pct, + self_timer.FormatElapsedHumanReadable(zone)); + } else { + OS::PrintErr("\n"); + } + } + + // Sort timers by the amount of time elapsed within each one. + Timer* by_elapsed[kNumTimers]; + for (intptr_t i = 0; i < kNumTimers; i++) { + by_elapsed[i] = &timers->timers_[i]; + } + qsort(by_elapsed, kNumTimers, sizeof(Timer*), + [](const void* pa, const void* pb) -> int { + const auto a_elapsed = + (*static_cast(pa))->TotalElapsedTime(); + const auto b_elapsed = + (*static_cast(pb))->TotalElapsedTime(); + return b_elapsed < a_elapsed ? -1 : b_elapsed > a_elapsed ? 1 : 0; + }); + + // Print sorted in descending order. + for (intptr_t i = 0; i < kNumTimers; i++) { + auto timer = by_elapsed[i]; + if (timer->TotalElapsedTime() > 0) { + const auto timer_id = static_cast(timer - &timers->timers_[0]); + const auto pct = + static_cast(timer->TotalElapsedTime()) * 100 / total_elapsed; + // Must be int for * width specifier. + const int indent = static_cast(level * 2); + + // Note: don't emit EOL because PrintTimers can print self timing. + OS::PrintErr( + "%*s[%6.2f%%] %-*s %-10s", indent, "", pct, 60 - indent, + timer_names[timer_id], + Timer::FormatElapsedHumanReadable(zone, timer->TotalElapsedTime(), + timer->TotalElapsedTimeCpu())); + + // Print nested timers if any or just emit EOL. + if (timers->nested_[timer_id] != nullptr) { + PrintTimers(zone, timers->nested_[timer_id], *timer, level + 1); + } else { + OS::PrintErr("\n"); + } + } + } +} + +void CompilerTimings::Print() { + Zone* zone = Thread::Current()->zone(); + + OS::PrintErr("Precompilation took: %s\n", + total_.FormatElapsedHumanReadable(zone)); + + PrintTimers(zone, root_, total_, 0); + + OS::PrintErr("Inlining by outcome\n Success: %s\n Failure: %s\n", + try_inlining_success_.FormatElapsedHumanReadable(zone), + try_inlining_failure_.FormatElapsedHumanReadable(zone)); +} + +} // namespace dart diff --git a/runtime/vm/compiler/compiler_timings.h b/runtime/vm/compiler/compiler_timings.h new file mode 100644 index 00000000000..6f9b31b4491 --- /dev/null +++ b/runtime/vm/compiler/compiler_timings.h @@ -0,0 +1,160 @@ +// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#ifndef RUNTIME_VM_COMPILER_COMPILER_TIMINGS_H_ +#define RUNTIME_VM_COMPILER_COMPILER_TIMINGS_H_ + +#include + +#include "platform/allocation.h" +#include "vm/compiler/compiler_pass.h" +#include "vm/thread.h" +#include "vm/timer.h" + +#if defined(DART_PRECOMPILED_RUNTIME) +#error "AOT runtime should not use compiler sources (including header files)" +#endif // defined(DART_PRECOMPILED_RUNTIME) + +#define PRECOMPILER_TIMERS_LIST(V) \ + V(CompileAll) \ + V(Iterate) \ + V(CompileFunction) \ + V(PrecompileConstructors) \ + V(AttachOptimizedTypeTestingStub) \ + V(TraceForRetainedFunctions) \ + V(FinalizeDispatchTable) \ + V(ReplaceFunctionStaticCallEntries) \ + V(Drop) \ + V(Obfuscate) \ + V(Dedup) \ + V(SymbolsCompact) + +#define INLINING_TIMERS_LIST(V) \ + V(CollectGraphInfo) \ + V(PopulateWithICData) \ + V(FindCallSites) \ + V(SetInliningId) \ + V(MakeInliningDecision) \ + V(CheckForPragma) \ + V(InlineCall) \ + V(InlineRecognizedMethod) \ + V(DiscoverBlocks) \ + V(BuildDecisionGraph) \ + V(PrepareGraphs) + +// Note: COMPILER_PASS_LIST must be the first element of the list below because +// we expect that pass ids are the same as ids of corresponding timers. +#define COMPILER_TIMERS_LIST(V) \ + COMPILER_PASS_LIST(V) \ + PRECOMPILER_TIMERS_LIST(V) \ + INLINING_TIMERS_LIST(V) \ + V(BuildGraph) \ + V(EmitCode) \ + V(FinalizeCode) + +namespace dart { + +// |CompilerTimings| provides a way to track time taken by various compiler +// passes via a fixed number of timers (specified in |COMPILER_TIMERS_LIST|). +// +// It supports arbitrary nesting of timers e.g. if |DiscoverBlocks| is invoked +// within two different compiler passes like |IfConvert| and |BranchSimplify| +// then |CompilerTimings| will separate these two invocations and measure each +// separately. +class CompilerTimings : public MallocAllocated { + private: +#define INC(Name) +1 + static constexpr intptr_t kNumTimers = 0 COMPILER_TIMERS_LIST(INC); +#undef INC + + struct Timers : public MallocAllocated { + Timer timers_[kNumTimers]; + std::unique_ptr nested_[kNumTimers]; + }; + + public: + enum TimerId { +#define DECLARE_TIMER_ID(Name) k##Name, + COMPILER_TIMERS_LIST(DECLARE_TIMER_ID) +#undef DECLARE_TIMER_ID + }; + + // Timing scope which starts and stop the timer with the given |id|. + class Scope : public StackResource { + public: + Scope(Thread* thread, TimerId id) + : StackResource(thread), stats_(thread->compiler_timings()) { + if (stats_ != nullptr) { + outer_nested_ = stats_->nested_; + if (*outer_nested_ == nullptr) { + // Created array of nested timers if we don't have one yet. + *outer_nested_ = std::make_unique(); + } + + timer_ = &(*outer_nested_)->timers_[id]; + stats_->nested_ = &(*outer_nested_)->nested_[id]; + + timer_->Start(); + } + } + + ~Scope() { + if (stats_ != nullptr) { + timer_->Stop(); + stats_->nested_ = outer_nested_; + } + } + + private: + CompilerTimings* const stats_; + Timer* timer_; + std::unique_ptr* outer_nested_; + }; + + CompilerTimings() { total_.Start(); } + + void RecordInliningStatsByOutcome(bool success, const Timer& timer) { + if (success) { + try_inlining_success_.AddTotal(timer); + } else { + try_inlining_failure_.AddTotal(timer); + } + } + + void Print(); + + private: + void PrintTimers(Zone* zone, + const std::unique_ptr& timers, + const Timer& total, + intptr_t level); + + Timer total_; + std::unique_ptr root_ = std::make_unique(); + + // Timers nested under the currently running timer(s). + std::unique_ptr* nested_ = &root_; + + Timer try_inlining_success_; + Timer try_inlining_failure_; +}; + +#define TIMER_SCOPE_NAME2(counter) timer_scope_##counter +#define TIMER_SCOPE_NAME(counter) TIMER_SCOPE_NAME2(counter) + +#define COMPILER_TIMINGS_TIMER_SCOPE(thread, timer_id) \ + CompilerTimings::Scope TIMER_SCOPE_NAME(__COUNTER__)( \ + thread, CompilerTimings::k##timer_id) + +#define COMPILER_TIMINGS_PASS_TIMER_SCOPE(thread, pass_id) \ + CompilerTimings::Scope TIMER_SCOPE_NAME(__COUNTER__)( \ + thread, static_cast(pass_id)) + +#define PRECOMPILER_TIMER_SCOPE(precompiler, timer_id) \ + CompilerTimings::Scope TIMER_SCOPE_NAME(__COUNTER__)( \ + (precompiler)->thread(), CompilerTimings::k##timer_id) + +} // namespace dart + +#endif // RUNTIME_VM_COMPILER_COMPILER_TIMINGS_H_ diff --git a/runtime/vm/compiler/frontend/flow_graph_builder.cc b/runtime/vm/compiler/frontend/flow_graph_builder.cc index c2ed762c7e4..8b7a3941d6a 100644 --- a/runtime/vm/compiler/frontend/flow_graph_builder.cc +++ b/runtime/vm/compiler/frontend/flow_graph_builder.cc @@ -7,6 +7,7 @@ #include "vm/compiler/backend/branch_optimizer.h" #include "vm/compiler/backend/flow_graph.h" #include "vm/compiler/backend/il.h" +#include "vm/compiler/compiler_timings.h" #include "vm/compiler/frontend/kernel_to_il.h" #include "vm/object.h" #include "vm/zone.h" @@ -37,6 +38,7 @@ uword FindDoubleConstant(double value) { } void InlineExitCollector::PrepareGraphs(FlowGraph* callee_graph) { + COMPILER_TIMINGS_TIMER_SCOPE(callee_graph->thread(), PrepareGraphs); ASSERT(callee_graph->graph_entry()->SuccessorCount() == 1); ASSERT(callee_graph->max_block_id() > caller_graph_->max_block_id()); ASSERT(callee_graph->max_virtual_register_number() > diff --git a/runtime/vm/compiler/jit/compiler.cc b/runtime/vm/compiler/jit/compiler.cc index 7539cf82f84..0104e803f18 100644 --- a/runtime/vm/compiler/jit/compiler.cc +++ b/runtime/vm/compiler/jit/compiler.cc @@ -704,7 +704,7 @@ static ObjectPtr CompileFunctionHelper(CompilationPipeline* pipeline, Zone* const zone = stack_zone.GetZone(); const bool trace_compiler = FLAG_trace_compiler || (FLAG_trace_optimizing_compiler && optimized); - Timer per_compile_timer(trace_compiler, "Compilation time"); + Timer per_compile_timer; per_compile_timer.Start(); ParsedFunction* parsed_function = new (zone) diff --git a/runtime/vm/scope_timer.h b/runtime/vm/scope_timer.h index 0707980b77d..4e67ed2ceb0 100644 --- a/runtime/vm/scope_timer.h +++ b/runtime/vm/scope_timer.h @@ -35,7 +35,7 @@ class ScopeTimer : public ValueObject { } int64_t elapsed = GetElapsed(); double seconds = MicrosecondsToSeconds(elapsed); - OS::PrintErr("%s: %f seconds (%" Pd64 " \u00B5s)\n", name_, seconds, + OS::PrintErr("%s: %.2f seconds (%" Pd64 " \u00B5s)\n", name_, seconds, elapsed); } diff --git a/runtime/vm/snapshot_test.cc b/runtime/vm/snapshot_test.cc index 7ee9e11c939..944124f3c0c 100644 --- a/runtime/vm/snapshot_test.cc +++ b/runtime/vm/snapshot_test.cc @@ -726,7 +726,7 @@ VM_UNIT_TEST_CASE(FullSnapshot) { uint8_t* isolate_snapshot_data_buffer; // Start an Isolate, load a script and create a full snapshot. - Timer timer1(true, "Snapshot_test"); + Timer timer1; timer1.Start(); { TestIsolateScope __test_isolate__; @@ -760,7 +760,7 @@ VM_UNIT_TEST_CASE(FullSnapshot) { // Now Create another isolate using the snapshot and execute a method // from the script. - Timer timer2(true, "Snapshot_test"); + Timer timer2; timer2.Start(); TestCase::CreateTestIsolateFromSnapshot(isolate_snapshot_data_buffer); { diff --git a/runtime/vm/thread.h b/runtime/vm/thread.h index dcf8d0b9acd..f0cb6c11ee4 100644 --- a/runtime/vm/thread.h +++ b/runtime/vm/thread.h @@ -32,6 +32,7 @@ class AbstractType; class ApiLocalScope; class Array; class CompilerState; +class CompilerTimings; class Class; class Code; class Error; @@ -506,6 +507,12 @@ class Thread : public ThreadState { type_usage_info_ = value; } + CompilerTimings* compiler_timings() const { return compiler_timings_; } + + void set_compiler_timings(CompilerTimings* stats) { + compiler_timings_ = stats; + } + int32_t no_callback_scope_depth() const { return no_callback_scope_depth_; } void IncrementNoCallbackScopeDepth() { @@ -1117,6 +1124,8 @@ class Thread : public ThreadState { TypeUsageInfo* type_usage_info_; GrowableObjectArrayPtr pending_functions_; + CompilerTimings* compiler_timings_ = nullptr; + ErrorPtr sticky_error_; Random thread_random_; diff --git a/runtime/vm/timer.h b/runtime/vm/timer.h index 922c43a3516..8f478d53f33 100644 --- a/runtime/vm/timer.h +++ b/runtime/vm/timer.h @@ -13,17 +13,24 @@ namespace dart { +struct MeasureMonotonic { + static inline int64_t Now() { return OS::GetCurrentMonotonicMicros(); } +}; + +struct MeasureCpu { + static inline int64_t Now() { return OS::GetCurrentThreadCPUMicros(); } +}; + // Timer class allows timing of specific operations in the VM. -class Timer : public ValueObject { +template +class TimerImpl : public ValueObject { public: - Timer(bool report, const char* message) : report_(report), message_(message) { - Reset(); - } - ~Timer() {} + TimerImpl() { Reset(); } + ~TimerImpl() {} // Start timer. void Start() { - start_ = OS::GetCurrentMonotonicMicros(); + start_ = Measure::Now(); running_ = true; } @@ -31,7 +38,7 @@ class Timer : public ValueObject { void Stop() { ASSERT(start_ != 0); ASSERT(running()); - stop_ = OS::GetCurrentMonotonicMicros(); + stop_ = Measure::Now(); int64_t elapsed = ElapsedMicros(); max_contiguous_ = Utils::Maximum(max_contiguous_.load(), elapsed); // Make increment atomic in case it occurs in parallel with aggregation. @@ -43,7 +50,7 @@ class Timer : public ValueObject { int64_t TotalElapsedTime() const { int64_t result = total_; if (running_) { - int64_t now = OS::GetCurrentMonotonicMicros(); + int64_t now = Measure::Now(); result += (now - start_); } return result; @@ -52,7 +59,7 @@ class Timer : public ValueObject { int64_t MaxContiguous() const { int64_t result = max_contiguous_; if (running_) { - int64_t now = OS::GetCurrentMonotonicMicros(); + int64_t now = Measure::Now(); result = Utils::Maximum(result, now - start_); } return result; @@ -71,14 +78,17 @@ class Timer : public ValueObject { (max_contiguous_ == 0) && !running_; } - void AddTotal(const Timer& other) { total_.fetch_add(other.total_); } + void AddTotal(const TimerImpl& other) { total_.fetch_add(other.total_); } // Accessors. - bool report() const { return report_; } bool running() const { return running_; } - const char* message() const { return message_; } private: + friend class Timer; + + explicit TimerImpl(int64_t elapsed) + : total_(elapsed), max_contiguous_(elapsed) {} + int64_t ElapsedMicros() const { ASSERT(start_ != 0); ASSERT(stop_ != 0); @@ -89,13 +99,105 @@ class Timer : public ValueObject { RelaxedAtomic stop_; RelaxedAtomic total_; RelaxedAtomic max_contiguous_; - bool report_; - bool running_; - const char* message_; + + bool running_ = false; + + DISALLOW_COPY_AND_ASSIGN(TimerImpl); +}; + +class Timer : public ValueObject { + public: + Timer(int64_t elapsed, int64_t elapsed_cpu) + : monotonic_(elapsed), cpu_(elapsed) {} + Timer() { Reset(); } + ~Timer() {} + + // Start timer. + void Start() { + cpu_.Start(); + monotonic_.Start(); + } + + // Stop timer. + void Stop() { + cpu_.Stop(); + monotonic_.Stop(); + } + + // Get total cumulative elapsed time in micros. + int64_t TotalElapsedTime() const { return monotonic_.TotalElapsedTime(); } + int64_t TotalElapsedTimeCpu() const { return cpu_.TotalElapsedTime(); } + + int64_t MaxContiguous() const { return monotonic_.MaxContiguous(); } + + void Reset() { + monotonic_.Reset(); + cpu_.Reset(); + } + + bool IsReset() const { return monotonic_.IsReset(); } + + void AddTotal(const Timer& other) { + monotonic_.AddTotal(other.monotonic_); + cpu_.AddTotal(other.cpu_); + } + + const char* FormatElapsedHumanReadable(Zone* zone) const { + return FormatElapsedHumanReadable(zone, TotalElapsedTime(), + TotalElapsedTimeCpu()); + } + + static const char* FormatTime(Zone* zone, int64_t total) { + if (total > kMicrosecondsPerSecond) { + return OS::SCreate(zone, "%6.2f s", MicrosecondsToSeconds(total)); + } else if (total > kMicrosecondsPerMillisecond) { + return OS::SCreate(zone, "%6.2f ms", MicrosecondsToMilliseconds(total)); + } else { + return OS::SCreate(zone, "%6" Pd64 " \u00B5s", total); + } + } + + static constexpr double kCpuTimeReportingThreshold = 0.05; + + // Formats the given monotonic and CPU times as a human readable string. + // + // CPU time is included into the formated string only if + // it is |kCpuTimeReportingThreshold| percent different from the monotonic + // time. + static const char* FormatElapsedHumanReadable(Zone* zone, + int64_t total_elapsed, + int64_t total_elapsed_cpu) { + if ((total_elapsed == 0) || + static_cast(Utils::Abs(total_elapsed - total_elapsed_cpu) / + total_elapsed) < kCpuTimeReportingThreshold) { + return FormatTime(zone, total_elapsed); + } else { + return OS::SCreate(zone, "%s (cpu %s)", FormatTime(zone, total_elapsed), + FormatTime(zone, total_elapsed_cpu)); + } + } + + private: + TimerImpl monotonic_; + TimerImpl cpu_; DISALLOW_COPY_AND_ASSIGN(Timer); }; +class TimerScope : public StackResource { + public: + TimerScope(ThreadState* thread, Timer* timer) + : StackResource(thread), timer_(timer) { + if (timer_ != nullptr) timer_->Start(); + } + ~TimerScope() { + if (timer_ != nullptr) timer_->Stop(); + } + + private: + Timer* const timer_; +}; + } // namespace dart #endif // RUNTIME_VM_TIMER_H_