mirror of
https://github.com/dart-lang/sdk
synced 2024-10-03 00:29:50 +00:00
[vm] Use codegen block order in regalloc in AOT.
Codegen block order keeps loop blocks together and reflecting that in live ranges allows to produce better register allocation decisions. Consider for example the loop: v = def(); while (cond) { if (smth) { // (*) use(v); return true; } } If block (*) is interspersed with loop blocks the register allocator might decide to allocate it to the register in some of the loop blocks. If the same block is "sunk" away from loop blocks - the register allocator can clearly see that `v` does not necessarily have to live on the register in the whole loop. In JIT codegen block order is not topologically sorted and as such is unsuitable for our linear scan. TEST=ci Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-x64-try,vm-aot-mac-release-arm64-try,vm-aot-linux-release-x64-try Change-Id: I0726815db998b559267949e157cd2158f5dd55f7 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/358448 Commit-Queue: Slava Egorov <vegorov@google.com> Reviewed-by: Alexander Markov <alexmarkov@google.com>
This commit is contained in:
parent
3bc14606f9
commit
395b024685
|
@ -225,8 +225,10 @@ void BlockScheduler::ReorderBlocksJIT(FlowGraph* flow_graph) {
|
|||
flow_graph->CodegenBlockOrder()->Add(checked_entry);
|
||||
}
|
||||
// Build a new block order. Emit each chain when its first block occurs
|
||||
// in the original reverse postorder ordering (which gives a topological
|
||||
// sort of the blocks).
|
||||
// in the original reverse postorder ordering.
|
||||
// Note: the resulting order is not topologically sorted and can't be
|
||||
// used a replacement for reverse_postorder in algorithms that expect
|
||||
// topological sort.
|
||||
for (intptr_t i = block_count - 1; i >= 0; --i) {
|
||||
if (chains[i]->first->block == flow_graph->postorder()[i]) {
|
||||
for (Link* link = chains[i]->first; link != nullptr; link = link->next) {
|
||||
|
|
|
@ -210,6 +210,8 @@ class FlowGraph : public ZoneAllocated {
|
|||
const GrowableArray<BlockEntryInstr*>& optimized_block_order() const {
|
||||
return optimized_block_order_;
|
||||
}
|
||||
|
||||
// In AOT these are guaranteed to be topologically sorted, but not in JIT.
|
||||
GrowableArray<BlockEntryInstr*>* CodegenBlockOrder();
|
||||
const GrowableArray<BlockEntryInstr*>* CodegenBlockOrder() const;
|
||||
|
||||
|
|
|
@ -77,12 +77,20 @@ static ExtraLoopInfo* ComputeExtraLoopInfo(Zone* zone, LoopInfo* loop_info) {
|
|||
return new (zone) ExtraLoopInfo(start, end);
|
||||
}
|
||||
|
||||
static const GrowableArray<BlockEntryInstr*>& BlockOrderForAllocation(
|
||||
const FlowGraph& flow_graph) {
|
||||
// Currently CodegenBlockOrder is not topologically sorted in JIT and can't
|
||||
// be used for register allocation.
|
||||
return CompilerState::Current().is_aot() ? *flow_graph.CodegenBlockOrder()
|
||||
: flow_graph.reverse_postorder();
|
||||
}
|
||||
|
||||
FlowGraphAllocator::FlowGraphAllocator(const FlowGraph& flow_graph,
|
||||
bool intrinsic_mode)
|
||||
: flow_graph_(flow_graph),
|
||||
reaching_defs_(flow_graph),
|
||||
value_representations_(flow_graph.max_vreg()),
|
||||
block_order_(flow_graph.reverse_postorder()),
|
||||
block_order_(BlockOrderForAllocation(flow_graph)),
|
||||
postorder_(flow_graph.postorder()),
|
||||
instructions_(),
|
||||
block_entries_(),
|
||||
|
@ -582,19 +590,21 @@ static bool HasOnlyUnconstrainedUses(LiveRange* range) {
|
|||
}
|
||||
|
||||
void FlowGraphAllocator::BuildLiveRanges() {
|
||||
const intptr_t block_count = postorder_.length();
|
||||
ASSERT(postorder_.Last()->IsGraphEntry());
|
||||
const intptr_t block_count = block_order_.length();
|
||||
ASSERT(block_order_[0]->IsGraphEntry());
|
||||
BitVector* current_interference_set = nullptr;
|
||||
Zone* zone = flow_graph_.zone();
|
||||
for (intptr_t i = 0; i < (block_count - 1); i++) {
|
||||
BlockEntryInstr* block = postorder_[i];
|
||||
for (intptr_t x = block_count - 1; x > 0; --x) {
|
||||
BlockEntryInstr* block = block_order_[x];
|
||||
|
||||
ASSERT(BlockEntryAt(block->start_pos()) == block);
|
||||
|
||||
// For every SSA value that is live out of this block, create an interval
|
||||
// that covers the whole block. It will be shortened if we encounter a
|
||||
// definition of this value in this block.
|
||||
for (BitVector::Iterator it(liveness_.GetLiveOutSetAt(i)); !it.Done();
|
||||
it.Advance()) {
|
||||
for (BitVector::Iterator it(
|
||||
liveness_.GetLiveOutSetAt(block->postorder_number()));
|
||||
!it.Done(); it.Advance()) {
|
||||
LiveRange* range = GetLiveRange(it.Current());
|
||||
range->AddUseInterval(block->start_pos(), block->end_pos());
|
||||
}
|
||||
|
@ -637,8 +647,9 @@ void FlowGraphAllocator::BuildLiveRanges() {
|
|||
if (block->IsLoopHeader()) {
|
||||
ASSERT(loop_info != nullptr);
|
||||
current_interference_set = nullptr;
|
||||
for (BitVector::Iterator it(liveness_.GetLiveInSetAt(i)); !it.Done();
|
||||
it.Advance()) {
|
||||
for (BitVector::Iterator it(
|
||||
liveness_.GetLiveInSetAt(block->postorder_number()));
|
||||
!it.Done(); it.Advance()) {
|
||||
LiveRange* range = GetLiveRange(it.Current());
|
||||
intptr_t loop_end = extra_loop_info_[loop_info->id()]->end;
|
||||
if (HasOnlyUnconstrainedUsesInLoop(range, loop_end)) {
|
||||
|
@ -1681,11 +1692,7 @@ static ParallelMoveInstr* CreateParallelMoveAfter(Instruction* instr,
|
|||
void FlowGraphAllocator::NumberInstructions() {
|
||||
intptr_t pos = 0;
|
||||
|
||||
// The basic block order is reverse postorder.
|
||||
const intptr_t block_count = postorder_.length();
|
||||
for (intptr_t i = block_count - 1; i >= 0; i--) {
|
||||
BlockEntryInstr* block = postorder_[i];
|
||||
|
||||
for (auto block : block_order_) {
|
||||
instructions_.Add(block);
|
||||
block_entries_.Add(block);
|
||||
block->set_start_pos(pos);
|
||||
|
@ -1706,9 +1713,7 @@ void FlowGraphAllocator::NumberInstructions() {
|
|||
|
||||
// Create parallel moves in join predecessors. This must be done after
|
||||
// all instructions are numbered.
|
||||
for (intptr_t i = block_count - 1; i >= 0; i--) {
|
||||
BlockEntryInstr* block = postorder_[i];
|
||||
|
||||
for (auto block : block_order_) {
|
||||
// For join entry predecessors create phi resolution moves if
|
||||
// necessary. They will be populated by the register allocator.
|
||||
JoinEntryInstr* join = block->AsJoinEntry();
|
||||
|
@ -3180,10 +3185,7 @@ void FlowGraphAllocator::CollectRepresentations() {
|
|||
}
|
||||
}
|
||||
|
||||
for (BlockIterator it = flow_graph_.reverse_postorder_iterator(); !it.Done();
|
||||
it.Advance()) {
|
||||
BlockEntryInstr* block = it.Current();
|
||||
|
||||
for (auto block : block_order_) {
|
||||
if (auto entry = block->AsBlockEntryWithInitialDefs()) {
|
||||
initial_definitions = entry->initial_definitions();
|
||||
for (intptr_t i = 0; i < initial_definitions->length(); ++i) {
|
||||
|
@ -3209,9 +3211,8 @@ void FlowGraphAllocator::CollectRepresentations() {
|
|||
}
|
||||
|
||||
// Normal instructions.
|
||||
for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
|
||||
instr_it.Advance()) {
|
||||
Definition* def = instr_it.Current()->AsDefinition();
|
||||
for (auto instr : block->instructions()) {
|
||||
Definition* def = instr->AsDefinition();
|
||||
if ((def != nullptr) && (def->vreg(0) >= 0)) {
|
||||
const intptr_t vreg = def->vreg(0);
|
||||
value_representations_[vreg] =
|
||||
|
@ -3257,7 +3258,7 @@ void FlowGraphAllocator::RemoveFrameIfNotNeeded() {
|
|||
#if defined(TARGET_ARCH_ARM64) || defined(TARGET_ARCH_ARM)
|
||||
bool has_write_barrier_call = false;
|
||||
#endif
|
||||
for (auto block : flow_graph_.reverse_postorder()) {
|
||||
for (auto block : block_order_) {
|
||||
for (auto instruction : block->instructions()) {
|
||||
if (instruction->HasLocs() && instruction->locs()->can_call()) {
|
||||
// Function contains a call and thus needs a frame.
|
||||
|
@ -3359,7 +3360,7 @@ void FlowGraphAllocator::AllocateOutgoingArguments() {
|
|||
const intptr_t total_spill_slot_count =
|
||||
flow_graph_.graph_entry()->spill_slot_count();
|
||||
|
||||
for (auto block : flow_graph_.reverse_postorder()) {
|
||||
for (auto block : block_order_) {
|
||||
for (auto instr : block->instructions()) {
|
||||
if (auto move_arg = instr->AsMoveArgument()) {
|
||||
// Register calling conventions are not used in JIT.
|
||||
|
@ -3383,7 +3384,7 @@ void FlowGraphAllocator::AllocateOutgoingArguments() {
|
|||
void FlowGraphAllocator::ScheduleParallelMoves() {
|
||||
ParallelMoveResolver resolver;
|
||||
|
||||
for (auto block : flow_graph_.reverse_postorder()) {
|
||||
for (auto block : block_order_) {
|
||||
if (block->HasParallelMove()) {
|
||||
resolver.Resolve(block->parallel_move());
|
||||
}
|
||||
|
|
|
@ -330,8 +330,9 @@ FlowGraph* CompilerPass::RunForceOptimizedPipeline(
|
|||
INVOKE_PASS_AOT(DelayAllocations);
|
||||
INVOKE_PASS(EliminateWriteBarriers);
|
||||
INVOKE_PASS(FinalizeGraph);
|
||||
INVOKE_PASS(AllocateRegisters);
|
||||
INVOKE_PASS(ReorderBlocks);
|
||||
INVOKE_PASS(AllocateRegisters);
|
||||
INVOKE_PASS(TestILSerialization); // Must be last.
|
||||
return pass_state->flow_graph();
|
||||
}
|
||||
|
||||
|
@ -398,8 +399,9 @@ FlowGraph* CompilerPass::RunPipeline(PipelineMode mode,
|
|||
INVOKE_PASS(EliminateWriteBarriers);
|
||||
INVOKE_PASS(FinalizeGraph);
|
||||
INVOKE_PASS(Canonicalize);
|
||||
INVOKE_PASS(AllocateRegisters);
|
||||
INVOKE_PASS(ReorderBlocks);
|
||||
INVOKE_PASS(AllocateRegisters);
|
||||
INVOKE_PASS(TestILSerialization); // Must be last.
|
||||
return pass_state->flow_graph();
|
||||
}
|
||||
|
||||
|
@ -571,22 +573,6 @@ COMPILER_PASS(AllocateRegistersForGraphIntrinsic, {
|
|||
|
||||
COMPILER_PASS(ReorderBlocks, {
|
||||
BlockScheduler::ReorderBlocks(flow_graph);
|
||||
|
||||
// This is the last compiler pass.
|
||||
// Test that round-trip IL serialization works before generating code.
|
||||
if (FLAG_test_il_serialization && CompilerState::Current().is_aot()) {
|
||||
Zone* zone = flow_graph->zone();
|
||||
auto* detached_defs = new (zone) ZoneGrowableArray<Definition*>(zone, 0);
|
||||
flow_graph->CompactSSA(detached_defs);
|
||||
|
||||
ZoneWriteStream write_stream(flow_graph->zone(), 1024);
|
||||
FlowGraphSerializer serializer(&write_stream);
|
||||
serializer.WriteFlowGraph(*flow_graph, *detached_defs);
|
||||
ReadStream read_stream(write_stream.buffer(), write_stream.bytes_written());
|
||||
FlowGraphDeserializer deserializer(flow_graph->parsed_function(),
|
||||
&read_stream);
|
||||
state->set_flow_graph(deserializer.ReadFlowGraph());
|
||||
}
|
||||
});
|
||||
|
||||
COMPILER_PASS(EliminateWriteBarriers, { EliminateWriteBarriers(flow_graph); });
|
||||
|
@ -606,6 +592,24 @@ COMPILER_PASS(FinalizeGraph, {
|
|||
flow_graph->RemoveRedefinitions();
|
||||
});
|
||||
|
||||
COMPILER_PASS(TestILSerialization, {
|
||||
// This is the last compiler pass.
|
||||
// Test that round-trip IL serialization works before generating code.
|
||||
if (FLAG_test_il_serialization && CompilerState::Current().is_aot()) {
|
||||
Zone* zone = flow_graph->zone();
|
||||
auto* detached_defs = new (zone) ZoneGrowableArray<Definition*>(zone, 0);
|
||||
flow_graph->CompactSSA(detached_defs);
|
||||
|
||||
ZoneWriteStream write_stream(flow_graph->zone(), 1024);
|
||||
FlowGraphSerializer serializer(&write_stream);
|
||||
serializer.WriteFlowGraph(*flow_graph, *detached_defs);
|
||||
ReadStream read_stream(write_stream.buffer(), write_stream.bytes_written());
|
||||
FlowGraphDeserializer deserializer(flow_graph->parsed_function(),
|
||||
&read_stream);
|
||||
state->set_flow_graph(deserializer.ReadFlowGraph());
|
||||
}
|
||||
});
|
||||
|
||||
COMPILER_PASS(GenerateCode, { state->graph_compiler->CompileGraph(); });
|
||||
|
||||
} // namespace dart
|
||||
|
|
|
@ -54,6 +54,7 @@ namespace dart {
|
|||
V(UseTableDispatch) \
|
||||
V(WidenSmiToInt32) \
|
||||
V(EliminateWriteBarriers) \
|
||||
V(TestILSerialization) \
|
||||
V(GenerateCode)
|
||||
|
||||
class AllocationSinking;
|
||||
|
|
Loading…
Reference in a new issue