[vm] Add vm:align-loops pragma

This pragma forces compiler to align loop headers within the
function by architecture specific boundary: 32 bytes on X64
and ARM64 (with the exception of Apple Silicon, which explicitly
discourages aligning branch targets in the optimization manual).

Current implementation is rather naive and does not do any
attempt to decide whether aligning is actually profitable
based on loop body itself.

I have found this pragma to be helpful both to stabilize
benchmark results and achieve better performance
for tight loops on Intel hardware.

Issue https://github.com/dart-lang/sdk/issues/55522

TEST=vm/dart/align_loops_test

Cq-Include-Trybots: luci.dart.try:vm-aot-linux-product-x64-try,vm-aot-linux-debug-x64c-try,vm-aot-linux-debug-x64-try,vm-aot-linux-release-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-product-arm64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-aot-obfuscate-linux-release-x64-try,vm-aot-optimization-level-linux-release-x64-try,vm-aot-win-release-x64-try,vm-aot-win-debug-arm64-try,vm-aot-win-debug-x64-try,vm-aot-win-debug-x64c-try,vm-aot-win-product-x64-try,vm-aot-win-release-arm64-try,vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-dwarf-linux-product-x64-try,vm-aot-android-release-arm64c-try,vm-aot-android-release-arm_x64-try
Change-Id: Ic22fb90d85e7fdebeeaa3908a43328c59436ab58
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/364121
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Slava Egorov <vegorov@google.com>
This commit is contained in:
Vyacheslav Egorov 2024-04-24 08:52:18 +00:00 committed by Commit Queue
parent 39276e9bd4
commit 2a2781edff
24 changed files with 441 additions and 43 deletions

View file

@ -20,6 +20,7 @@ These pragmas are part of the VM's API and are safe for use in external code.
| `vm:isolate-unsendable` | Marks a class, instances of which won't be allowed to be passed through ports or sent between isolates. |
| `vm:awaiter-link` | [Specifying variable to follow for awaiter stack unwinding](awaiter_stack_traces.md) |
| `vm:deeply-immutable` | [Specifying a class and all its subtypes are deeply immutable](deeply_immutable.md) |
| `vm:align-loops` | Tells compiler to align all loop headers inside the function to an architecture specific boundary: currently 32 bytes on X64 and ARM64 (except Apple Silicon, which explicitly discourages aligning branch targets) |
## Unsafe pragmas for general use

View file

@ -0,0 +1,91 @@
// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:convert';
import 'dart:io';
import 'dart:async';
import 'package:expect/expect.dart';
import 'package:native_stack_traces/elf.dart';
import 'package:path/path.dart' as path;
import 'use_flag_test_helper.dart';
void checkAligned(Symbol sym) {
// We only expect to run this test on X64 Linux.
final expectedAlignment = 32;
if ((sym.value & (expectedAlignment - 1)) != 0) {
throw 'Symbol $sym has value ${sym.value} which is not aligned by '
'$expectedAlignment';
}
}
Future<void> testAOT(String dillPath, {bool useAsm = false}) async {
await withTempDir('align-loops-test-${useAsm ? 'asm' : 'elf'}',
(String tempDir) async {
// Generate the snapshot
final snapshotPath = path.join(tempDir, 'libtest.so');
final commonSnapshotArgs = [dillPath];
if (useAsm) {
final assemblyPath = path.join(tempDir, 'test.S');
await run(genSnapshot, <String>[
'--snapshot-kind=app-aot-assembly',
'--assembly=$assemblyPath',
...commonSnapshotArgs,
]);
await assembleSnapshot(assemblyPath, snapshotPath);
} else {
await run(genSnapshot, <String>[
'--snapshot-kind=app-aot-elf',
'--elf=$snapshotPath',
...commonSnapshotArgs,
]);
}
print("Snapshot generated at $snapshotPath.");
final elf = Elf.fromFile(snapshotPath)!;
// The very first symbol should be aligned by 32 bytes because it is
// the start of the instructions section.
checkAligned(elf.staticSymbols.first);
for (var symbol in elf.staticSymbols) {
if (symbol.name.startsWith('alignedFunction')) {
checkAligned(symbol);
}
}
});
}
void main() async {
// Only run this test on Linux X64 for simplicity.
if (!(Platform.isLinux && buildDir.endsWith('X64'))) {
return;
}
await withTempDir('align_loops', (String tempDir) async {
final testProgram = path.join(sdkDir, 'runtime', 'tests', 'vm', 'dart',
'align_loops_test_program.dart');
final aotDillPath = path.join(tempDir, 'aot_test.dill');
await run(genKernel, <String>[
'--aot',
'--platform',
platformDill,
...Platform.executableArguments
.where((arg) => arg.startsWith('--enable-experiment=')),
'-o',
aotDillPath,
testProgram
]);
await Future.wait([
// Test unstripped ELF generation directly.
testAOT(aotDillPath),
testAOT(aotDillPath, useAsm: true),
]);
});
}

View file

@ -0,0 +1,68 @@
// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:typed_data';
@pragma('vm:never-inline')
int foo(Uint8List list) {
var result = 0;
for (var i = 0; i < list.length; i++) {
result ^= list[i];
}
return result;
}
@pragma('vm:never-inline')
@pragma('vm:align-loops')
int alignedFunction1(Uint8List list) {
var result = 0;
for (var i = 0; i < list.length; i++) {
result ^= list[i];
}
return result;
}
@pragma('vm:never-inline')
int baz(Uint8List list) {
var result = 1;
for (var i = 0; i < list.length; i++) {
result ^= list[i];
}
return result;
}
@pragma('vm:never-inline')
@pragma('vm:align-loops')
int alignedFunction2(Uint8List list) {
var result = 2;
for (var i = 0; i < list.length; i++) {
result ^= list[i];
}
return result;
}
@pragma('vm:never-inline')
int benchmark(String name, int Function(Uint8List) f, Uint8List list) {
final sw = Stopwatch()..start();
int result = 0;
int n = 0;
while (sw.elapsedMilliseconds < 2000) {
result ^= f(list);
n++;
}
print('$name: ${sw.elapsedMilliseconds / n}');
return result;
}
void main() {
final v = Uint8List(1024 * 1024 * 10);
// Note: we don't use tear-offs for alignedFunctionX because that would
// lead to two symbols both called alignedFunctionX in the resulting ELF:
// one for tear-off and one for the actual function. This would make it
// harder to verify that alignedFunction1 itself is correctly aligned.
benchmark('foo', foo, v);
benchmark('alignedFunction1', (list) => alignedFunction1(list), v);
benchmark('baz', baz, v);
benchmark('alignedFunction2', (list) => alignedFunction2(list), v);
}

View file

@ -270,6 +270,22 @@ void Assembler::Bind(Label* label) {
label->BindTo(bound_pc, lr_state());
}
void Assembler::Align(intptr_t alignment, intptr_t offset) {
ASSERT(Utils::IsPowerOfTwo(alignment));
intptr_t pos = offset + buffer_.GetPosition();
intptr_t mod = pos & (alignment - 1);
if (mod == 0) {
return;
}
intptr_t bytes_needed = alignment - mod;
ASSERT((bytes_needed % Instr::kInstrSize) == 0);
while (bytes_needed > 0) {
nop();
bytes_needed -= Instr::kInstrSize;
}
ASSERT(((offset + buffer_.GetPosition()) & (alignment - 1)) == 0);
}
#if defined(TARGET_USES_THREAD_SANITIZER)
void Assembler::TsanLoadAcquire(Register addr) {
LeafRuntimeScope rt(this, /*frame_size=*/0, /*preserve_registers=*/true);

View file

@ -500,6 +500,10 @@ class Assembler : public AssemblerBase {
}
}
void nop() { Emit(Instr::kNopInstruction); }
void Align(intptr_t alignment, intptr_t offset);
void Bind(Label* label) override;
// Unconditional jump to a given label. [distance] is ignored on ARM.
void Jump(Label* label, JumpDistance distance = kFarJump) { b(label); }

View file

@ -601,8 +601,6 @@ class AssemblerBase : public StackResource {
public:
explicit AssemblerBase(ObjectPoolBuilder* object_pool_builder)
: StackResource(ThreadState::Current()),
prologue_offset_(-1),
has_monomorphic_entry_(false),
object_pool_builder_(object_pool_builder) {}
virtual ~AssemblerBase();
@ -622,6 +620,11 @@ class AssemblerBase : public StackResource {
intptr_t prologue_offset() const { return prologue_offset_; }
bool has_monomorphic_entry() const { return has_monomorphic_entry_; }
// Tracks if the resulting code should be aligned by kPreferredLoopAlignment
// boundary.
void mark_should_be_aligned() { should_be_aligned_ = true; }
bool should_be_aligned() const { return should_be_aligned_; }
void Comment(const char* format, ...) PRINTF_ATTRIBUTE(2, 3);
static bool EmittingComments();
@ -1223,8 +1226,9 @@ class AssemblerBase : public StackResource {
protected:
AssemblerBuffer buffer_; // Contains position independent code.
int32_t prologue_offset_;
bool has_monomorphic_entry_;
int32_t prologue_offset_ = -1;
bool has_monomorphic_entry_ = false;
bool should_be_aligned_ = false;
intptr_t unchecked_entry_offset_ = 0;

View file

@ -72,6 +72,11 @@ DECLARE_FLAG(charp, stacktrace_filter);
DECLARE_FLAG(int, gc_every);
DECLARE_FLAG(bool, trace_compiler);
DEFINE_FLAG(bool,
align_all_loops,
false,
"Align all loop headers to 32 byte boundary");
#if defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_ARM64)
compiler::LRState ComputeInnerLRState(const FlowGraph& flow_graph) {
auto entry = flow_graph.graph_entry();
@ -651,6 +656,17 @@ void FlowGraphCompiler::CompileGraph() {
}
}
#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
// Returns true if function is marked with vm:align-loops pragma.
static bool IsMarkedWithAlignLoops(const Function& function) {
Object& options = Object::Handle();
return Library::FindPragma(dart::Thread::Current(),
/*only_core=*/false, function,
Symbols::vm_align_loops(),
/*multiple=*/false, &options);
}
#endif // defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
void FlowGraphCompiler::VisitBlocks() {
CompactBlocks();
if (compiler::Assembler::EmittingComments()) {
@ -669,6 +685,11 @@ void FlowGraphCompiler::VisitBlocks() {
const auto inner_lr_state = ComputeInnerLRState(flow_graph());
#endif // defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_ARM64)
#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
const bool should_align_loops =
FLAG_align_all_loops || IsMarkedWithAlignLoops(function());
#endif // defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
for (intptr_t i = 0; i < block_order().length(); ++i) {
// Compile the block entry.
BlockEntryInstr* entry = block_order()[i];
@ -699,8 +720,21 @@ void FlowGraphCompiler::VisitBlocks() {
for (LoopInfo* l = entry->loop_info(); l != nullptr; l = l->outer()) {
assembler()->Comment(" Loop %" Pd "", l->id());
}
if (entry->IsLoopHeader()) {
assembler()->Comment(" Loop Header");
}
}
#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
if (should_align_loops && entry->IsLoopHeader() &&
kPreferredLoopAlignment > 1) {
assembler()->mark_should_be_aligned();
assembler()->Align(kPreferredLoopAlignment, 0);
}
#else
static_assert(kPreferredLoopAlignment == 1);
#endif // defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
BeginCodeSourceRange(entry->source());
ASSERT(pending_deoptimization_env_ == nullptr);
pending_deoptimization_env_ = entry->env();

View file

@ -152,6 +152,17 @@ bool CodeRelocator::AddInstructionsToText(CodePtr code) {
if (text_offsets_.HasKey(instructions)) {
return false;
}
if (Instructions::ShouldBeAligned(instructions) &&
!Utils::IsAligned(next_text_offset_, kPreferredLoopAlignment)) {
const intptr_t padding_size =
Utils::RoundUp(next_text_offset_, kPreferredLoopAlignment) -
next_text_offset_;
commands_->Add(ImageWriterCommand(next_text_offset_, padding_size));
next_text_offset_ += padding_size;
}
text_offsets_.Insert({instructions, next_text_offset_});
commands_->Add(ImageWriterCommand(next_text_offset_, code));
next_text_offset_ += ImageWriter::SizeInSnapshot(instructions);
@ -447,10 +458,13 @@ void CodeRelocator::BuildTrampolinesForAlmostOutOfRangeCalls(
const Array& next_caller_targets) {
const bool all_functions_emitted = next_caller.IsNull();
bool next_requires_alignment = false;
uword next_size = 0;
uword next_call_count = 0;
if (!all_functions_emitted) {
next_size = ImageWriter::SizeInSnapshot(next_caller.instructions());
next_requires_alignment =
Instructions::ShouldBeAligned(next_caller.instructions());
if (!next_caller_targets.IsNull()) {
StaticCallsTable calls(next_caller_targets);
next_call_count = calls.Length();
@ -465,8 +479,12 @@ void CodeRelocator::BuildTrampolinesForAlmostOutOfRangeCalls(
// unresolved forward calls to become out-of-range, we'll not resolve it
// yet (maybe the target function will come very soon and we don't need
// a trampoline at all).
const intptr_t next_start =
next_requires_alignment
? Utils::RoundUp(next_text_offset_, kPreferredLoopAlignment)
: next_text_offset_;
const intptr_t future_boundary =
next_text_offset_ + next_size +
next_start + next_size +
kTrampolineSize *
(unresolved_calls_by_destination_.Length() + next_call_count - 1);
if (IsTargetInRangeFor(unresolved_call, future_boundary) &&

View file

@ -59,8 +59,8 @@ struct RelocatorTestHelper {
}
CodePtr AllocationInstruction(uintptr_t size) {
const auto& instructions = Instructions::Handle(
Instructions::New(size, /*has_monomorphic=*/false));
const auto& instructions = Instructions::Handle(Instructions::New(
size, /*has_monomorphic=*/false, /*should_be_aligned=*/false));
uword addr = instructions.PayloadStart();
for (uintptr_t i = 0; i < (size / 4); ++i) {
@ -212,6 +212,9 @@ struct RelocatorTestHelper {
case ImageWriterCommand::InsertBytesOfTrampoline:
size += (*commands)[i].insert_trampoline_bytes.buffer_length;
break;
case ImageWriterCommand::InsertPadding:
size += (*commands)[i].insert_padding.padding_length;
break;
case ImageWriterCommand::InsertInstructionOfCode:
size += ImageWriter::SizeInSnapshot(Code::InstructionsOf(
(*commands)[i].insert_instruction_of_code.code));
@ -219,8 +222,8 @@ struct RelocatorTestHelper {
}
}
auto& instructions = Instructions::Handle(
Instructions::New(size, /*has_monomorphic=*/false));
auto& instructions = Instructions::Handle(Instructions::New(
size, /*has_monomorphic=*/false, /*should_be_aligned=*/false));
{
uword addr = instructions.PayloadStart();
for (intptr_t i = 0; i < commands->length(); ++i) {
@ -233,6 +236,14 @@ struct RelocatorTestHelper {
addr += current_size;
break;
}
case ImageWriterCommand::InsertPadding: {
const auto entry = (*commands)[i].insert_padding;
const auto current_size = entry.padding_length;
ASSERT(addr + current_size <= instructions.PayloadStart() + size);
memset(reinterpret_cast<void*>(addr), 0, current_size);
addr += current_size;
break;
}
case ImageWriterCommand::InsertInstructionOfCode: {
const auto entry = (*commands)[i].insert_instruction_of_code;
const auto current_size =

View file

@ -869,13 +869,6 @@ DART_FORCE_INLINE static bool BareInstructionsPayloads() {
return FLAG_precompiled_mode;
}
word InstructionsSection::HeaderSize() {
// We only create InstructionsSections in precompiled mode.
ASSERT(FLAG_precompiled_mode);
return Utils::RoundUp(InstructionsSection::UnalignedHeaderSize(),
Instructions::kBarePayloadAlignment);
}
word Instructions::HeaderSize() {
return BareInstructionsPayloads()
? 0

View file

@ -657,6 +657,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x8;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x20;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x18;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -1371,6 +1373,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -2074,6 +2078,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x8;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x20;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x18;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -2790,6 +2796,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -3500,6 +3508,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -4212,6 +4222,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -4917,6 +4929,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x8;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x20;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x18;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -5632,6 +5646,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -6328,6 +6344,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x8;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x20;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x18;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -7034,6 +7052,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -7729,6 +7749,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x8;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x20;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x18;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -8437,6 +8459,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -9139,6 +9163,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -9843,6 +9869,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -10540,6 +10568,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x8;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x20;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x18;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -11247,6 +11277,8 @@ static constexpr dart::compiler::target::word Instructions_UnalignedHeaderSize =
0x10;
static constexpr dart::compiler::target::word
InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word InstructionsSection_HeaderSize =
0x40;
static constexpr dart::compiler::target::word InstructionsTable_InstanceSize =
0x30;
static constexpr dart::compiler::target::word Int32x4_InstanceSize = 0x18;
@ -12017,6 +12049,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x8;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x20;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x18;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -12804,6 +12838,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -13600,6 +13636,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -14390,6 +14428,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -15182,6 +15222,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -15971,6 +16013,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x8;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x20;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x18;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -16759,6 +16803,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -17538,6 +17584,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x8;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x20;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x18;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -18316,6 +18364,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -19103,6 +19153,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -19884,6 +19936,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -20667,6 +20721,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -21447,6 +21503,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x8;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x14;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x20;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x18;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;
@ -22226,6 +22284,8 @@ static constexpr dart::compiler::target::word
AOT_Instructions_UnalignedHeaderSize = 0x10;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_UnalignedHeaderSize = 0x28;
static constexpr dart::compiler::target::word
AOT_InstructionsSection_HeaderSize = 0x40;
static constexpr dart::compiler::target::word
AOT_InstructionsTable_InstanceSize = 0x30;
static constexpr dart::compiler::target::word AOT_Int32x4_InstanceSize = 0x18;

View file

@ -436,6 +436,7 @@
SIZEOF(Instructions, UnalignedHeaderSize, UntaggedInstructions) \
SIZEOF(InstructionsSection, UnalignedHeaderSize, \
UntaggedInstructionsSection) \
FIELD(InstructionsSection, HeaderSize) \
SIZEOF(InstructionsTable, InstanceSize, UntaggedInstructionsTable) \
SIZEOF(Int32x4, InstanceSize, UntaggedInt32x4) \
SIZEOF(Integer, InstanceSize, UntaggedInteger) \

View file

@ -1337,6 +1337,9 @@ inline Register ConcreteRegister(LinkRegister) {
#define LINK_REGISTER (LinkRegister())
// Prioritize code size over performance.
const intptr_t kPreferredLoopAlignment = 1;
} // namespace dart
#endif // RUNTIME_VM_CONSTANTS_ARM_H_

View file

@ -1641,6 +1641,22 @@ inline Register ConcreteRegister(LinkRegister) {
#define LINK_REGISTER (LinkRegister())
// There are many different ARM64 CPUs out there with different alignment
// requirements which are mostly not very well documented.
//
// Apple Silicon CPU Optimization Guide explicitly discourages alignment of
// branch targets (see section 4.4.3).
//
// Aligning to 32 seems like a safe bet based on LLVM's implementation:
//
// https://github.com/llvm/llvm-project/blob/05c1447b3eabe9cc4a27866094e46c57350c5d5a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp#L107
//
#if defined(DART_TARGET_OS_MACOS_IOS) || defined(DART_TARGET_OS_MACOS)
const intptr_t kPreferredLoopAlignment = 1;
#else
const intptr_t kPreferredLoopAlignment = 32;
#endif
} // namespace dart
#endif // RUNTIME_VM_CONSTANTS_ARM64_H_

View file

@ -531,6 +531,9 @@ struct DartCallingConvention {
const uword kBreakInstructionFiller = 0xCCCCCCCC;
// Prioritize code size over performance.
const intptr_t kPreferredLoopAlignment = 1;
} // namespace dart
#endif // RUNTIME_VM_CONSTANTS_IA32_H_

View file

@ -1606,6 +1606,9 @@ inline Register ConcreteRegister(Register r) {
}
#define LINK_REGISTER RA
// No information available.
const intptr_t kPreferredLoopAlignment = 1;
} // namespace dart
#endif // RUNTIME_VM_CONSTANTS_RISCV_H_

View file

@ -719,6 +719,12 @@ const int MAX_NOP_SIZE = 8;
const uint64_t kBreakInstructionFiller = 0xCCCCCCCCCCCCCCCCL;
// Based on presentation "Causes of Performance Instability due to Code
// Placement in X86 - Zia Ansari, Intel"[1].
//
// [1]: https://www.youtube.com/watch?v=IX16gcX4vDQ
const intptr_t kPreferredLoopAlignment = 32;
} // namespace dart
#endif // RUNTIME_VM_CONSTANTS_X64_H_

View file

@ -188,7 +188,8 @@ ImageWriter::ImageWriter(Thread* t, bool generates_assembly)
instructions_section_type_(
TagObjectTypeAsReadOnly(zone_, "InstructionsSection")),
instructions_type_(TagObjectTypeAsReadOnly(zone_, "Instructions")),
trampoline_type_(TagObjectTypeAsReadOnly(zone_, "Trampoline")) {
trampoline_type_(TagObjectTypeAsReadOnly(zone_, "Trampoline")),
padding_type_(TagObjectTypeAsReadOnly(zone_, "Padding")) {
ResetOffsets();
}
@ -219,6 +220,13 @@ void ImageWriter::PrepareForSerialization(
next_text_offset_ += trampoline_length;
break;
}
case ImageWriterCommand::InsertPadding: {
auto padding_length = inst.insert_padding.padding_length;
const intptr_t offset = next_text_offset_;
instructions_.Add(InstructionsData(nullptr, padding_length, offset));
next_text_offset_ += padding_length;
break;
}
default:
UNREACHABLE();
}
@ -377,7 +385,8 @@ void ImageWriter::DumpInstructionsSizes() {
js.OpenArray();
for (intptr_t i = 0; i < instructions_.length(); i++) {
auto& data = instructions_[i];
const bool is_trampoline = data.code_ == nullptr;
// We count alignment padding into trampolines for now.
const bool is_trampoline = data.trampoline_length != 0;
if (is_trampoline) {
trampolines_total_size += data.trampoline_length;
continue;
@ -457,8 +466,7 @@ void ImageWriter::Write(NonStreamingWriteStream* clustered_stream, bool vm) {
// will allocate on the Dart heap.
for (intptr_t i = 0; i < instructions_.length(); i++) {
InstructionsData& data = instructions_[i];
const bool is_trampoline = data.trampoline_bytes != nullptr;
if (is_trampoline) continue;
if (data.trampoline_length != 0) continue;
data.insns_ = &Instructions::Handle(zone_, data.raw_insns_);
ASSERT(data.raw_code_ != nullptr);
@ -781,7 +789,7 @@ void ImageWriter::WriteText(bool vm) {
const intptr_t section_contents_alignment =
bare_instruction_payloads
? compiler::target::Instructions::kBarePayloadAlignment
? InstructionsSection::kPayloadAlignment
: compiler::target::ObjectAlignment::kObjectAlignment;
const intptr_t alignment_offset =
compiler::target::ObjectAlignment::kOldObjectAlignmentOffset;
@ -803,10 +811,16 @@ void ImageWriter::WriteText(bool vm) {
PcDescriptors& descriptors = PcDescriptors::Handle(zone_);
#endif
// We don't expect more than 64 bytes of padding.
uint8_t padding_bytes[64];
memset(&padding_bytes[0], 0, sizeof(padding_bytes));
ASSERT(offset_space_ != IdSpace::kSnapshot);
for (intptr_t i = 0; i < instructions_.length(); i++) {
auto& data = instructions_[i];
const bool is_trampoline = data.trampoline_bytes != nullptr;
const bool is_padding =
data.trampoline_bytes == nullptr && data.trampoline_length != 0;
ASSERT_EQUAL(data.text_offset_, text_offset);
#if defined(DART_PRECOMPILER)
@ -814,9 +828,12 @@ void ImageWriter::WriteText(bool vm) {
if (profile_writer_ != nullptr) {
const V8SnapshotProfileWriter::ObjectId id(offset_space_, text_offset);
auto const type = is_trampoline ? trampoline_type_ : instructions_type_;
const intptr_t size = is_trampoline ? data.trampoline_length
: SizeInSnapshot(data.insns_->ptr());
auto const type = is_trampoline ? trampoline_type_
: is_padding ? padding_type_
: instructions_type_;
const intptr_t size = (is_trampoline || is_padding)
? data.trampoline_length
: SizeInSnapshot(data.insns_->ptr());
profile_writer_->SetObjectTypeAndName(id, type, object_name);
profile_writer_->AttributeBytesTo(id, size);
const intptr_t element_offset = id.nonce() - parent_id.nonce();
@ -833,6 +850,11 @@ void ImageWriter::WriteText(bool vm) {
continue;
}
if (is_padding) {
text_offset += WriteBytes(padding_bytes, data.trampoline_length);
continue;
}
const intptr_t instr_start = text_offset;
const auto& insns = *data.insns_;
@ -1275,6 +1297,8 @@ const char* ImageWriter::SnapshotTextObjectNamer::SnapshotNameFor(
ZoneTextBuffer printer(zone_);
if (data.trampoline_bytes != nullptr) {
printer.AddString("Trampoline");
} else if (data.trampoline_length != 0) {
printer.AddString("Padding");
} else {
AddNonUniqueNameFor(&printer, *data.code_);
}

View file

@ -198,11 +198,13 @@ typedef DirectChainedHashMap<ObjectOffsetTrait> ObjectOffsetMap;
//
// * emitting the instructions of a [Code] object
// * emitting a trampoline of a certain size
// * emitting a padding of a certain size
//
struct ImageWriterCommand {
enum Opcode {
InsertInstructionOfCode,
InsertBytesOfTrampoline,
InsertPadding,
};
ImageWriterCommand(intptr_t expected_offset, CodePtr code)
@ -217,6 +219,11 @@ struct ImageWriterCommand {
op(ImageWriterCommand::InsertBytesOfTrampoline),
insert_trampoline_bytes({trampoline_bytes, trampoline_length}) {}
ImageWriterCommand(intptr_t expected_offset, intptr_t padding_length)
: expected_offset(expected_offset),
op(ImageWriterCommand::InsertPadding),
insert_padding({padding_length}) {}
// The offset (relative to the very first [ImageWriterCommand]) we expect
// this [ImageWriterCommand] to have.
intptr_t expected_offset;
@ -230,6 +237,10 @@ struct ImageWriterCommand {
uint8_t* buffer;
intptr_t buffer_length;
} insert_trampoline_bytes;
struct {
intptr_t padding_length;
} insert_padding;
};
};
@ -707,6 +718,7 @@ class ImageWriter : public ValueObject {
const char* const instructions_section_type_;
const char* const instructions_type_;
const char* const trampoline_type_;
const char* const padding_type_;
template <class T>
friend class TraceImageObjectScope;

View file

@ -15493,7 +15493,9 @@ void Library::CheckFunctionFingerprints() {
}
#endif // defined(DEBUG) && !defined(DART_PRECOMPILED_RUNTIME).
InstructionsPtr Instructions::New(intptr_t size, bool has_monomorphic_entry) {
InstructionsPtr Instructions::New(intptr_t size,
bool has_monomorphic_entry,
bool should_be_aligned) {
ASSERT(size >= 0);
ASSERT(Object::instructions_class() != Class::null());
if (size < 0 || size > kMaxElements) {
@ -15509,6 +15511,7 @@ InstructionsPtr Instructions::New(intptr_t size, bool has_monomorphic_entry) {
// Set this within the NoSafepointScope as well since it is contained in
// the same bitfield as the size.
result.SetHasMonomorphicEntry(has_monomorphic_entry);
result.SetShouldBeAligned(should_be_aligned);
}
ASSERT(result.stats() == nullptr);
return result.ptr();
@ -18102,7 +18105,8 @@ CodePtr Code::FinalizeCode(FlowGraphCompiler* compiler,
assembler->GetSelfHandle() = code.ptr();
#endif
Instructions& instrs = Instructions::ZoneHandle(Instructions::New(
assembler->CodeSize(), assembler->has_monomorphic_entry()));
assembler->CodeSize(), assembler->has_monomorphic_entry(),
assembler->should_be_aligned()));
{
// Important: if GC is triggered at any point between Instructions::New

View file

@ -5678,13 +5678,34 @@ class Instructions : public Object {
public:
enum {
kSizePos = 0,
kSizeSize = 31,
kSizeSize = 30,
kFlagsPos = kSizePos + kSizeSize,
kFlagsSize = 1, // Currently, only flag is single entry flag.
kFlagsSize = kBitsPerInt32 - kSizeSize,
};
#define INSTRUCTIONS_FLAGS_LIST(V) \
V(HasMonomorphicEntry) \
V(ShouldBeAligned)
enum {
#define DEFINE_INSTRUCTIONS_FLAG(Name) k##Name##Index,
INSTRUCTIONS_FLAGS_LIST(DEFINE_INSTRUCTIONS_FLAG)
#undef DEFINE_INSTRUCTIONS_FLAG
};
class SizeBits : public BitField<uint32_t, uint32_t, kSizePos, kSizeSize> {};
class FlagsBits : public BitField<uint32_t, bool, kFlagsPos, kFlagsSize> {};
#define DEFINE_INSTRUCTIONS_FLAG_HANDLING(Name) \
class Name##Bit \
: public BitField<uint32_t, bool, kFlagsPos + k##Name##Index, 1> {}; \
bool Name() const { return Name##Bit::decode(untag()->size_and_flags_); } \
static bool Name(const InstructionsPtr instr) { \
return Name##Bit::decode(instr->untag()->size_and_flags_); \
}
INSTRUCTIONS_FLAGS_LIST(DEFINE_INSTRUCTIONS_FLAG_HANDLING)
#undef DEFINE_INSTRUCTIONS_FLAG_HANDLING
// Excludes HeaderSize().
intptr_t Size() const { return SizeBits::decode(untag()->size_and_flags_); }
@ -5692,13 +5713,6 @@ class Instructions : public Object {
return SizeBits::decode(instr->untag()->size_and_flags_);
}
bool HasMonomorphicEntry() const {
return FlagsBits::decode(untag()->size_and_flags_);
}
static bool HasMonomorphicEntry(const InstructionsPtr instr) {
return FlagsBits::decode(instr->untag()->size_and_flags_);
}
uword PayloadStart() const { return PayloadStart(ptr()); }
uword MonomorphicEntryPoint() const { return MonomorphicEntryPoint(ptr()); }
uword EntryPoint() const { return EntryPoint(ptr()); }
@ -5847,16 +5861,23 @@ class Instructions : public Object {
SizeBits::update(value, untag()->size_and_flags_));
}
void SetHasMonomorphicEntry(bool value) const {
StoreNonPointer(&untag()->size_and_flags_,
FlagsBits::update(value, untag()->size_and_flags_));
#define DEFINE_INSTRUCTIONS_FLAG_HANDLING(Name) \
void Set##Name(bool value) const { \
StoreNonPointer(&untag()->size_and_flags_, \
Name##Bit::update(value, untag()->size_and_flags_)); \
}
INSTRUCTIONS_FLAGS_LIST(DEFINE_INSTRUCTIONS_FLAG_HANDLING)
#undef DEFINE_INSTRUCTIONS_FLAG_HANDLING
// New is a private method as RawInstruction and RawCode objects should
// only be created using the Code::FinalizeCode method. This method creates
// the RawInstruction and RawCode objects, sets up the pointer offsets
// and links the two in a GC safe manner.
static InstructionsPtr New(intptr_t size, bool has_monomorphic_entry);
static InstructionsPtr New(intptr_t size,
bool has_monomorphic_entry,
bool should_be_aligned);
FINAL_HEAP_OBJECT_IMPLEMENTATION(Instructions, Object);
friend class Class;
@ -5892,9 +5913,13 @@ class InstructionsSection : public Object {
return Utils::RoundUp(HeaderSize() + size, kObjectAlignment);
}
static constexpr intptr_t kPayloadAlignment = 32;
static_assert(kPreferredLoopAlignment <= kPayloadAlignment);
static_assert(Instructions::kBarePayloadAlignment <= kPayloadAlignment);
static intptr_t HeaderSize() {
return Utils::RoundUp(sizeof(UntaggedInstructionsSection),
Instructions::kBarePayloadAlignment);
kPayloadAlignment);
}
// There are no public instance methods for the InstructionsSection class, as

View file

@ -1973,7 +1973,6 @@ class UntaggedInstructions : public UntaggedObject {
VISIT_NOTHING();
// Instructions size in bytes and flags.
// Currently, only flag indicates 1 or 2 entry points.
uint32_t size_and_flags_;
// Variable length data follows here.

View file

@ -562,7 +562,8 @@ class ObjectPointerVisitor;
V(vm_recognized, "vm:recognized") \
V(vm_testing_print_flow_graph, "vm:testing:print-flow-graph") \
V(vm_trace_entrypoints, "vm:testing.unsafe.trace-entrypoints-fn") \
V(vm_unsafe_no_interrupts, "vm:unsafe:no-interrupts")
V(vm_unsafe_no_interrupts, "vm:unsafe:no-interrupts") \
V(vm_align_loops, "vm:align-loops")
// Contains a list of frequently used strings in a canonicalized form. This
// list is kept in the vm_isolate in order to share the copy across isolates

View file

@ -29,6 +29,7 @@ main(List<String> args) async {
...args,
'-a$arch',
'-m$mode',
'--no-rbe',
'offsets_extractor',
'offsets_extractor_precompiled_runtime'
]);