[vm/compiler] Ignore redefinitions in CSE.

Redefinitions exist to prevent code motion and don't affect
the meaning of instructions they flow into as inputs.

Consequently, when comparing two instructions for equality
we should unwrap all the inputs.

The only exception from the rule is LoadField instruction:
we would like to avoid replacing a load from a redefinition
with a dominating load from the original definition because
that would break a dependency chain on the redefinition.

This change is done to address code size regressions caused
by retaining and/or inserting more redefinitions to prevent
illegal code motions. These new redefinitions impact CSE
optimizations and (without this CL) lead to redundant boxing
being left over in the optimized code.

TEST=ci

Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-linux-debug-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-release-x64-try
Change-Id: Idd917fcb8c7117670a1d9c5c32f1bae57569d3b5
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/219242
Commit-Queue: Slava Egorov <vegorov@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
This commit is contained in:
Vyacheslav Egorov 2021-11-16 13:55:31 +00:00 committed by commit-bot@chromium.org
parent b1d1c76844
commit d4ced16301
5 changed files with 193 additions and 14 deletions

View file

@ -237,6 +237,10 @@ struct CidRangeValue {
return cid_start == kIllegalCid && cid_end == kIllegalCid;
}
bool Equals(const CidRangeValue& other) const {
return cid_start == other.cid_start && cid_end == other.cid_end;
}
intptr_t cid_start;
intptr_t cid_end;
};
@ -959,6 +963,11 @@ class Instruction : public ZoneAllocated {
return static_cast<T*>(this);
}
template <typename T>
const T* Cast() const {
return static_cast<const T*>(this);
}
// Returns structure describing location constraints required
// to emit native code for this instruction.
LocationSummary* locs() {
@ -8893,7 +8902,9 @@ class CheckClassIdInstr : public TemplateInstruction<1, NoThrow> {
virtual bool AllowsCSE() const { return true; }
virtual bool HasUnknownSideEffects() const { return false; }
virtual bool AttributesEqual(const Instruction& other) const { return true; }
virtual bool AttributesEqual(const Instruction& other) const {
return other.Cast<CheckClassIdInstr>()->cids().Equals(cids_);
}
PRINT_OPERANDS_TO_SUPPORT

View file

@ -29,10 +29,15 @@ DEFINE_FLAG(bool,
// Quick access to the current zone.
#define Z (zone())
class CSEInstructionMap : public ValueObject {
// A set of Instructions used by CSE pass.
//
// Instructions are compared as if all redefinitions were removed from the
// graph, with the exception of LoadField instruction which gets special
// treatment.
class CSEInstructionSet : public ValueObject {
public:
CSEInstructionMap() : map_() {}
explicit CSEInstructionMap(const CSEInstructionMap& other)
CSEInstructionSet() : map_() {}
explicit CSEInstructionSet(const CSEInstructionSet& other)
: ValueObject(), map_(other.map_) {}
Instruction* Lookup(Instruction* other) const {
@ -46,7 +51,59 @@ class CSEInstructionMap : public ValueObject {
}
private:
PointerSet<Instruction> map_;
static Definition* OriginalDefinition(Value* value) {
return value->definition()->OriginalDefinition();
}
static bool EqualsIgnoringRedefinitions(const Instruction& a,
const Instruction& b) {
const auto tag = a.tag();
if (tag != b.tag()) return false;
const auto input_count = a.InputCount();
if (input_count != b.InputCount()) return false;
// We would like to avoid replacing a load from a redefinition with a
// load from an original definition because that breaks the dependency
// on the redefinition and enables potentially incorrect code motion.
if (tag != Instruction::kLoadField) {
for (intptr_t i = 0; i < input_count; ++i) {
if (OriginalDefinition(a.InputAt(i)) !=
OriginalDefinition(b.InputAt(i))) {
return false;
}
}
} else {
for (intptr_t i = 0; i < input_count; ++i) {
if (!a.InputAt(i)->Equals(*b.InputAt(i))) return false;
}
}
return a.AttributesEqual(b);
}
class Trait {
public:
typedef Instruction* Value;
typedef Instruction* Key;
typedef Instruction* Pair;
static Key KeyOf(Pair kv) { return kv; }
static Value ValueOf(Pair kv) { return kv; }
static inline uword Hash(Key key) {
uword result = key->tag();
for (intptr_t i = 0; i < key->InputCount(); ++i) {
result = CombineHashes(
result, OriginalDefinition(key->InputAt(i))->ssa_temp_index());
}
return FinalizeHash(result, kBitsPerInt32 - 1);
}
static inline bool IsKeyEqual(Pair kv, Key key) {
return EqualsIgnoringRedefinitions(*kv, *key);
}
};
DirectChainedHashMap<Trait> map_;
};
// Place describes an abstract location (e.g. field) that IR can load
@ -2870,13 +2927,14 @@ class LoadOptimizer : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(LoadOptimizer);
};
bool DominatorBasedCSE::Optimize(FlowGraph* graph) {
bool DominatorBasedCSE::Optimize(FlowGraph* graph,
bool run_load_optimization /* = true */) {
bool changed = false;
if (FLAG_load_cse) {
if (FLAG_load_cse && run_load_optimization) {
changed = LoadOptimizer::OptimizeGraph(graph) || changed;
}
CSEInstructionMap map;
CSEInstructionSet map;
changed = OptimizeRecursive(graph, graph->graph_entry(), &map) || changed;
return changed;
@ -2884,7 +2942,7 @@ bool DominatorBasedCSE::Optimize(FlowGraph* graph) {
bool DominatorBasedCSE::OptimizeRecursive(FlowGraph* graph,
BlockEntryInstr* block,
CSEInstructionMap* map) {
CSEInstructionSet* map) {
bool changed = false;
for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
Instruction* current = it.Current();
@ -2912,7 +2970,7 @@ bool DominatorBasedCSE::OptimizeRecursive(FlowGraph* graph,
BlockEntryInstr* child = block->dominated_blocks()[i];
if (i < num_children - 1) {
// Copy map.
CSEInstructionMap child_map(*map);
CSEInstructionSet child_map(*map);
changed = OptimizeRecursive(graph, child, &child_map) || changed;
} else {
// Reuse map for the last child.

View file

@ -14,7 +14,7 @@
namespace dart {
class CSEInstructionMap;
class CSEInstructionSet;
class AllocationSinking : public ZoneAllocated {
public:
@ -88,12 +88,12 @@ class DominatorBasedCSE : public AllStatic {
public:
// Return true, if the optimization changed the flow graph.
// False, if nothing changed.
static bool Optimize(FlowGraph* graph);
static bool Optimize(FlowGraph* graph, bool run_load_optimization = true);
private:
static bool OptimizeRecursive(FlowGraph* graph,
BlockEntryInstr* entry,
CSEInstructionMap* map);
CSEInstructionSet* map);
};
class DeadStoreElimination : public AllStatic {

View file

@ -1352,6 +1352,116 @@ ISOLATE_UNIT_TEST_CASE(CheckStackOverflowElimination_NoInterruptsPragma) {
}
}
// This test checks that CSE unwraps redefinitions when comparing all
// instructions except loads, which are handled specially.
ISOLATE_UNIT_TEST_CASE(CSE_Redefinitions) {
const char* script_chars = R"(
@pragma("vm:external-name", "BlackholeNative")
external dynamic blackhole([a, b, c, d, e, f]);
class K<T> {
final T field;
K(this.field);
}
)";
const Library& lib =
Library::Handle(LoadTestScript(script_chars, NoopNativeLookup));
const Class& cls = Class::ZoneHandle(
lib.LookupLocalClass(String::Handle(Symbols::New(thread, "K"))));
const Error& err = Error::Handle(cls.EnsureIsFinalized(thread));
EXPECT(err.IsNull());
const Field& original_field = Field::Handle(
cls.LookupField(String::Handle(Symbols::New(thread, "field"))));
EXPECT(!original_field.IsNull());
const Field& field = Field::Handle(original_field.CloneFromOriginal());
const Function& blackhole =
Function::ZoneHandle(GetFunction(lib, "blackhole"));
using compiler::BlockBuilder;
CompilerState S(thread, /*is_aot=*/false, /*is_optimizing=*/true);
FlowGraphBuilderHelper H;
auto b1 = H.flow_graph()->graph_entry()->normal_entry();
BoxInstr* box0;
BoxInstr* box1;
LoadFieldInstr* load0;
LoadFieldInstr* load1;
LoadFieldInstr* load2;
StaticCallInstr* call;
ReturnInstr* ret;
{
BlockBuilder builder(H.flow_graph(), b1);
auto& slot = Slot::Get(field, &H.flow_graph()->parsed_function());
auto param0 =
builder.AddParameter(0, 0, /*with_frame=*/true, kUnboxedDouble);
auto param1 = builder.AddParameter(1, 2, /*with_frame=*/true, kTagged);
auto redef0 =
builder.AddDefinition(new RedefinitionInstr(new Value(param0)));
auto redef1 =
builder.AddDefinition(new RedefinitionInstr(new Value(param0)));
box0 = builder.AddDefinition(
BoxInstr::Create(kUnboxedDouble, new Value(redef0)));
box1 = builder.AddDefinition(
BoxInstr::Create(kUnboxedDouble, new Value(redef1)));
auto redef2 =
builder.AddDefinition(new RedefinitionInstr(new Value(param1)));
auto redef3 =
builder.AddDefinition(new RedefinitionInstr(new Value(param1)));
load0 = builder.AddDefinition(
new LoadFieldInstr(new Value(redef2), slot, InstructionSource()));
load1 = builder.AddDefinition(
new LoadFieldInstr(new Value(redef3), slot, InstructionSource()));
load2 = builder.AddDefinition(
new LoadFieldInstr(new Value(redef3), slot, InstructionSource()));
auto args = new InputsArray(3);
args->Add(new Value(load0));
args->Add(new Value(load1));
args->Add(new Value(load2));
call = builder.AddInstruction(new StaticCallInstr(
InstructionSource(), blackhole, 0, Array::empty_array(), args,
S.GetNextDeoptId(), 0, ICData::RebindRule::kStatic));
ret = builder.AddReturn(new Value(box1));
}
H.FinishGraph();
// Running CSE without load optimization should eliminate redundant boxing
// but keep loads intact if they don't have exactly matching inputs.
DominatorBasedCSE::Optimize(H.flow_graph(), /*run_load_optimization=*/false);
EXPECT_PROPERTY(box1, it.WasEliminated());
EXPECT_PROPERTY(ret, it.value()->definition() == box0);
EXPECT_PROPERTY(load0, !it.WasEliminated());
EXPECT_PROPERTY(load1, !it.WasEliminated());
EXPECT_PROPERTY(load2, it.WasEliminated());
EXPECT_PROPERTY(call, it.ArgumentAt(0) == load0);
EXPECT_PROPERTY(call, it.ArgumentAt(1) == load1);
EXPECT_PROPERTY(call, it.ArgumentAt(2) == load1);
// Running load optimization pass should remove the second load but
// insert a redefinition to prevent code motion because the field
// has a generic type.
DominatorBasedCSE::Optimize(H.flow_graph(), /*run_load_optimization=*/true);
EXPECT_PROPERTY(load0, !it.WasEliminated());
EXPECT_PROPERTY(load1, it.WasEliminated());
EXPECT_PROPERTY(load2, it.WasEliminated());
EXPECT_PROPERTY(call, it.ArgumentAt(0) == load0);
EXPECT_PROPERTY(call, it.ArgumentAt(1)->IsRedefinition() &&
it.ArgumentAt(1)->OriginalDefinition() == load0);
EXPECT_PROPERTY(call, it.ArgumentAt(2)->IsRedefinition() &&
it.ArgumentAt(2)->OriginalDefinition() == load0);
}
#endif // !defined(TARGET_ARCH_IA32)
} // namespace dart

View file

@ -310,7 +310,7 @@ class DirectChainedHashMap
ASSERT_NOTNULL(zone),
initial_size) {}
// There is a current use of the copy constructor in CSEInstructionMap
// There is a current use of the copy constructor in CSEInstructionSet
// (compiler/backend/redundancy_elimination.cc), so work is needed if we
// want to disallow it.
DirectChainedHashMap(const DirectChainedHashMap& other)