[vm/arm64] Refactor leaf runtime call sequence.

Leaf runtime calls a silently clobbering R23 and R25 registers, which
causes bugs when these calls happen from write barriers (which are not
expected to clobber anything outside of R25).

We introduce a special helper class to handle saving and restoring
necessary registers.

The bug originally reported by Joe Lin <zuojian.lzj@alibaba-inc.com>
who proposed a fix[1]. This CL builds on top of that fix and tries to
make it more safe by forcing users to use a more strict API.

[1] https://dart-review.googlesource.com/c/sdk/+/162300

TEST=vm/dart/write_barrier_register_clobber_test

Change-Id: I93e0a13a3c4c38ad28210b35750a66e615e3e44a
Cq-Include-Trybots: luci.dart.try:vm-kernel-linux-release-simarm64-try,vm-kernel-precomp-linux-release-simarm64-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/162191
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Vyacheslav Egorov <vegorov@google.com>
This commit is contained in:
Vyacheslav Egorov 2020-09-11 07:37:51 +00:00 committed by commit-bot@chromium.org
parent f4d3f8e0a2
commit 62354c1540
9 changed files with 325 additions and 70 deletions

View file

@ -0,0 +1,81 @@
// Copyright (c) 2020, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// This test attempts to verify that write barrier slow path does not
// clobber any live values.
import 'dart:_internal' show VMInternalsForTesting;
import 'package:expect/expect.dart';
class Old {
var f;
Old(this.f);
}
@pragma('vm:never-inline')
int crashy(int v, List<Old> oldies) {
// This test attempts to create a lot of live values which would live across
// write barrier invocation so that when write-barrier calls runtime and
// clobbers a register this is detected.
var young = Object();
var len = oldies.length;
var i = 0;
var v00 = v + 0;
var v01 = v + 1;
var v02 = v + 2;
var v03 = v + 3;
var v04 = v + 4;
var v05 = v + 5;
var v06 = v + 6;
var v07 = v + 7;
var v08 = v + 8;
var v09 = v + 9;
var v10 = v + 10;
var v11 = v + 11;
var v12 = v + 12;
var v13 = v + 13;
var v14 = v + 14;
var v15 = v + 15;
var v16 = v + 16;
var v17 = v + 17;
var v18 = v + 18;
var v19 = v + 19;
while (i < len) {
// Eventually this will overflow store buffer and call runtime to acquire
// a new block.
oldies[i++].f = young;
}
return v00 +
v01 +
v02 +
v03 +
v04 +
v05 +
v06 +
v07 +
v08 +
v09 +
v10 +
v11 +
v12 +
v13 +
v14 +
v15 +
v16 +
v17 +
v18 +
v19;
}
void main(List<String> args) {
final init = args.contains('impossible') ? 1 : 0;
final oldies = List<Old>.generate(100000, (i) => Old(""));
VMInternalsForTesting.collectAllGarbage();
VMInternalsForTesting.collectAllGarbage();
Expect.equals(crashy(init, oldies), 190);
for (var o in oldies) {
Expect.isTrue(o.f is! String);
}
}

View file

@ -0,0 +1,81 @@
// Copyright (c) 2020, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// This test attempts to verify that write barrier slow path does not
// clobber any live values.
import 'dart:_internal' show VMInternalsForTesting;
import 'package:expect/expect.dart';
class Old {
var f;
Old(this.f);
}
@pragma('vm:never-inline')
int crashy(int v, List<Old> oldies) {
// This test attempts to create a lot of live values which would live across
// write barrier invocation so that when write-barrier calls runtime and
// clobbers a register this is detected.
var young = Object();
var len = oldies.length;
var i = 0;
var v00 = v + 0;
var v01 = v + 1;
var v02 = v + 2;
var v03 = v + 3;
var v04 = v + 4;
var v05 = v + 5;
var v06 = v + 6;
var v07 = v + 7;
var v08 = v + 8;
var v09 = v + 9;
var v10 = v + 10;
var v11 = v + 11;
var v12 = v + 12;
var v13 = v + 13;
var v14 = v + 14;
var v15 = v + 15;
var v16 = v + 16;
var v17 = v + 17;
var v18 = v + 18;
var v19 = v + 19;
while (i < len) {
// Eventually this will overflow store buffer and call runtime to acquire
// a new block.
oldies[i++].f = young;
}
return v00 +
v01 +
v02 +
v03 +
v04 +
v05 +
v06 +
v07 +
v08 +
v09 +
v10 +
v11 +
v12 +
v13 +
v14 +
v15 +
v16 +
v17 +
v18 +
v19;
}
void main(List<String> args) {
final init = args.contains('impossible') ? 1 : 0;
final oldies = List<Old>.generate(100000, (i) => Old(""));
VMInternalsForTesting.collectAllGarbage();
VMInternalsForTesting.collectAllGarbage();
Expect.equals(crashy(init, oldies), 190);
for (var o in oldies) {
Expect.isTrue(o.f is! String);
}
}

View file

@ -1485,7 +1485,7 @@ void Assembler::TransitionNativeToGenerated(Register state,
StoreToOffset(state, THR, target::Thread::exit_through_ffi_offset());
}
void Assembler::EnterCallRuntimeFrame(intptr_t frame_size) {
void Assembler::EnterCallRuntimeFrame(intptr_t frame_size, bool is_leaf) {
Comment("EnterCallRuntimeFrame");
EnterFrame(0);
if (!(FLAG_precompiled_mode && FLAG_use_bare_instructions)) {
@ -1510,19 +1510,30 @@ void Assembler::EnterCallRuntimeFrame(intptr_t frame_size) {
Push(reg);
}
ReserveAlignedFrameSpace(frame_size);
if (!is_leaf) { // Leaf calling sequence aligns the stack itself.
ReserveAlignedFrameSpace(frame_size);
} else {
PushPair(kCallLeafRuntimeCalleeSaveScratch1,
kCallLeafRuntimeCalleeSaveScratch2);
}
}
void Assembler::LeaveCallRuntimeFrame() {
void Assembler::LeaveCallRuntimeFrame(bool is_leaf) {
// SP might have been modified to reserve space for arguments
// and ensure proper alignment of the stack frame.
// We need to restore it before restoring registers.
const intptr_t fixed_frame_words_without_pc_and_fp =
target::frame_layout.dart_fixed_frame_size - 2;
const intptr_t kPushedRegistersSize =
kDartVolatileCpuRegCount * target::kWordSize +
kDartVolatileFpuRegCount * target::kWordSize +
(target::frame_layout.dart_fixed_frame_size - 2) *
target::kWordSize; // From EnterStubFrame (excluding PC / FP)
kDartVolatileFpuRegCount * sizeof(double) +
(kDartVolatileCpuRegCount + (is_leaf ? 2 : 0) +
fixed_frame_words_without_pc_and_fp) *
target::kWordSize;
AddImmediate(SP, FP, -kPushedRegistersSize);
if (is_leaf) {
PopPair(kCallLeafRuntimeCalleeSaveScratch1,
kCallLeafRuntimeCalleeSaveScratch2);
}
for (int i = kDartLastVolatileCpuReg; i >= kDartFirstVolatileCpuReg; i--) {
const Register reg = static_cast<Register>(i);
Pop(reg);
@ -1547,6 +1558,37 @@ void Assembler::CallRuntime(const RuntimeEntry& entry,
entry.Call(this, argument_count);
}
void Assembler::CallRuntimeScope::Call(intptr_t argument_count) {
assembler_->CallRuntime(entry_, argument_count);
}
Assembler::CallRuntimeScope::~CallRuntimeScope() {
if (preserve_registers_) {
assembler_->LeaveCallRuntimeFrame(entry_.is_leaf());
if (restore_code_reg_) {
assembler_->Pop(CODE_REG);
}
}
}
Assembler::CallRuntimeScope::CallRuntimeScope(Assembler* assembler,
const RuntimeEntry& entry,
intptr_t frame_size,
bool preserve_registers,
const Address* caller)
: assembler_(assembler),
entry_(entry),
preserve_registers_(preserve_registers),
restore_code_reg_(caller != nullptr) {
if (preserve_registers_) {
if (caller != nullptr) {
assembler_->Push(CODE_REG);
assembler_->ldr(CODE_REG, *caller);
}
assembler_->EnterCallRuntimeFrame(frame_size, entry.is_leaf());
}
}
void Assembler::EnterStubFrame() {
EnterDartFrame(0);
}

View file

@ -1651,10 +1651,51 @@ class Assembler : public AssemblerBase {
void EnterOsrFrame(intptr_t extra_size, Register new_pp = kNoRegister);
void LeaveDartFrame(RestorePP restore_pp = kRestoreCallerPP);
void EnterCallRuntimeFrame(intptr_t frame_size);
void LeaveCallRuntimeFrame();
void CallRuntime(const RuntimeEntry& entry, intptr_t argument_count);
// Helper method for performing runtime calls from callers requiring manual
// register preservation is required (e.g. outside IL instructions marked
// as calling).
class CallRuntimeScope : public ValueObject {
public:
CallRuntimeScope(Assembler* assembler,
const RuntimeEntry& entry,
intptr_t frame_size,
bool preserve_registers = true)
: CallRuntimeScope(assembler,
entry,
frame_size,
preserve_registers,
/*caller=*/nullptr) {}
CallRuntimeScope(Assembler* assembler,
const RuntimeEntry& entry,
intptr_t frame_size,
Address caller,
bool preserve_registers = true)
: CallRuntimeScope(assembler,
entry,
frame_size,
preserve_registers,
&caller) {}
void Call(intptr_t argument_count);
~CallRuntimeScope();
private:
CallRuntimeScope(Assembler* assembler,
const RuntimeEntry& entry,
intptr_t frame_size,
bool preserve_registers,
const Address* caller);
Assembler* const assembler_;
const RuntimeEntry& entry_;
const bool preserve_registers_;
const bool restore_code_reg_;
};
// Set up a stub frame so that the stack traversal code can easily identify
// a stub frame.
void EnterStubFrame();
@ -2402,6 +2443,11 @@ class Assembler : public AssemblerBase {
CanBeSmi can_be_smi,
BarrierFilterMode barrier_filter_mode);
// Note: leaf call sequence uses some abi callee save registers as scratch
// so they should be manually preserved.
void EnterCallRuntimeFrame(intptr_t frame_size, bool is_leaf);
void LeaveCallRuntimeFrame(bool is_leaf);
friend class dart::FlowGraphCompiler;
std::function<void(Register reg)> generate_invoke_write_barrier_wrapper_;
std::function<void()> generate_invoke_array_write_barrier_;

View file

@ -278,6 +278,10 @@ word RuntimeEntry::OffsetFromThread() const {
return target::Thread::OffsetFromThread(runtime_entry_);
}
bool RuntimeEntry::is_leaf() const {
return runtime_entry_->is_leaf();
}
namespace target {
const word kOldPageSize = dart::kOldPageSize;

View file

@ -233,6 +233,8 @@ class RuntimeEntry : public ValueObject {
word OffsetFromThread() const;
bool is_leaf() const;
protected:
RuntimeEntry(const dart::RuntimeEntry* runtime_entry,
RuntimeEntryCallInternal call)

View file

@ -49,16 +49,12 @@ static void EnsureIsNewOrRemembered(Assembler* assembler,
Label done;
__ tbnz(&done, R0, target::ObjectAlignment::kNewObjectBitPosition);
if (preserve_registers) {
__ EnterCallRuntimeFrame(0);
} else {
__ ReserveAlignedFrameSpace(0);
}
// [R0] already contains first argument.
__ mov(R1, THR);
__ CallRuntime(kEnsureRememberedAndMarkingDeferredRuntimeEntry, 2);
if (preserve_registers) {
__ LeaveCallRuntimeFrame();
{
Assembler::CallRuntimeScope scope(
assembler, kEnsureRememberedAndMarkingDeferredRuntimeEntry,
/*frame_size=*/0, /*preserve_registers=*/preserve_registers);
__ mov(R1, THR);
scope.Call(/*argument_count=*/2);
}
__ Bind(&done);
@ -1973,16 +1969,13 @@ static void GenerateWriteBarrierStubHelper(Assembler* assembler,
// Handle overflow: Call the runtime leaf function.
__ Bind(&overflow);
// Setup frame, push callee-saved registers.
__ Push(CODE_REG);
__ ldr(CODE_REG, stub_code);
__ EnterCallRuntimeFrame(0 * target::kWordSize);
__ mov(R0, THR);
__ CallRuntime(kStoreBufferBlockProcessRuntimeEntry, 1);
// Restore callee-saved registers, tear down frame.
__ LeaveCallRuntimeFrame();
__ Pop(CODE_REG);
{
Assembler::CallRuntimeScope scope(assembler,
kStoreBufferBlockProcessRuntimeEntry,
/*frame_size=*/0, stub_code);
__ mov(R0, THR);
scope.Call(/*argument_count=*/1);
}
__ ret();
__ Bind(&add_to_mark_stack);
@ -2020,13 +2013,13 @@ static void GenerateWriteBarrierStubHelper(Assembler* assembler,
__ ret();
__ Bind(&marking_overflow);
__ Push(CODE_REG);
__ ldr(CODE_REG, stub_code);
__ EnterCallRuntimeFrame(0 * target::kWordSize);
__ mov(R0, THR);
__ CallRuntime(kMarkingStackBlockProcessRuntimeEntry, 1);
__ LeaveCallRuntimeFrame();
__ Pop(CODE_REG);
{
Assembler::CallRuntimeScope scope(assembler,
kMarkingStackBlockProcessRuntimeEntry,
/*frame_size=*/0, stub_code);
__ mov(R0, THR);
scope.Call(/*argument_count=*/1);
}
__ ret();
__ Bind(&lost_race);
@ -2059,16 +2052,13 @@ static void GenerateWriteBarrierStubHelper(Assembler* assembler,
// Card table not yet allocated.
__ Bind(&remember_card_slow);
__ Push(CODE_REG);
__ PushPair(R0, R1);
__ ldr(CODE_REG, stub_code);
__ mov(R0, R1); // Arg0 = Object
__ mov(R1, R25); // Arg1 = Slot
__ EnterCallRuntimeFrame(0);
__ CallRuntime(kRememberCardRuntimeEntry, 2);
__ LeaveCallRuntimeFrame();
__ PopPair(R0, R1);
__ Pop(CODE_REG);
{
Assembler::CallRuntimeScope scope(assembler, kRememberCardRuntimeEntry,
/*frame_size=*/0, stub_code);
__ mov(R0, R1); // Arg0 = Object
__ mov(R1, R25); // Arg1 = Slot
scope.Call(/*argument_count=*/2);
}
__ ret();
}
}

View file

@ -242,21 +242,22 @@ const RegList kAllCpuRegistersList = 0xFFFFFFFF;
// See "Procedure Call Standard for the ARM 64-bit Architecture", document
// number "ARM IHI 0055B", May 22 2013.
#define R(REG) (1 << REG)
// C++ ABI call registers.
const RegList kAbiArgumentCpuRegs = (1 << R0) | (1 << R1) | (1 << R2) |
(1 << R3) | (1 << R4) | (1 << R5) |
(1 << R6) | (1 << R7);
const RegList kAbiArgumentCpuRegs =
R(R0) | R(R1) | R(R2) | R(R3) | R(R4) | R(R5) | R(R6) | R(R7);
#if defined(TARGET_OS_FUCHSIA)
const RegList kAbiPreservedCpuRegs =
(1 << R18) | (1 << R19) | (1 << R20) | (1 << R21) | (1 << R22) |
(1 << R23) | (1 << R24) | (1 << R25) | (1 << R26) | (1 << R27) | (1 << R28);
const RegList kAbiPreservedCpuRegs = R(R18) | R(R19) | R(R20) | R(R21) |
R(R22) | R(R23) | R(R24) | R(R25) |
R(R26) | R(R27) | R(R28);
const Register kAbiFirstPreservedCpuReg = R18;
const Register kAbiLastPreservedCpuReg = R28;
const int kAbiPreservedCpuRegCount = 11;
#else
const RegList kAbiPreservedCpuRegs =
(1 << R19) | (1 << R20) | (1 << R21) | (1 << R22) | (1 << R23) |
(1 << R24) | (1 << R25) | (1 << R26) | (1 << R27) | (1 << R28);
const RegList kAbiPreservedCpuRegs = R(R19) | R(R20) | R(R21) | R(R22) |
R(R23) | R(R24) | R(R25) | R(R26) |
R(R27) | R(R28);
const Register kAbiFirstPreservedCpuReg = R19;
const Register kAbiLastPreservedCpuReg = R28;
const int kAbiPreservedCpuRegCount = 10;
@ -265,11 +266,11 @@ const VRegister kAbiFirstPreservedFpuReg = V8;
const VRegister kAbiLastPreservedFpuReg = V15;
const int kAbiPreservedFpuRegCount = 8;
const intptr_t kReservedCpuRegisters =
(1 << SPREG) | // Dart SP
(1 << FPREG) | (1 << TMP) | (1 << TMP2) | (1 << PP) | (1 << THR) |
(1 << LR) | (1 << BARRIER_MASK) | (1 << NULL_REG) | (1 << R31) | // C++ SP
(1 << R18) | (1 << DISPATCH_TABLE_REG);
const intptr_t kReservedCpuRegisters = R(SPREG) | // Dart SP
R(FPREG) | R(TMP) | R(TMP2) | R(PP) |
R(THR) | R(LR) | R(BARRIER_MASK) |
R(NULL_REG) | R(R31) | // C++ SP
R(R18) | R(DISPATCH_TABLE_REG);
constexpr intptr_t kNumberOfReservedCpuRegisters = 12;
// CPU registers available to Dart allocator.
const RegList kDartAvailableCpuRegs =
@ -284,9 +285,17 @@ const Register kDartLastVolatileCpuReg = R14;
const int kDartVolatileCpuRegCount = 15;
const int kDartVolatileFpuRegCount = 24;
constexpr int kStoreBufferWrapperSize = 32;
// Two callee save scratch registers used by leaf runtime call sequence.
const Register kCallLeafRuntimeCalleeSaveScratch1 = R23;
const Register kCallLeafRuntimeCalleeSaveScratch2 = R25;
static_assert((R(kCallLeafRuntimeCalleeSaveScratch1) & kAbiPreservedCpuRegs) !=
0,
"Need callee save scratch register for leaf runtime calls.");
static_assert((R(kCallLeafRuntimeCalleeSaveScratch2) & kAbiPreservedCpuRegs) !=
0,
"Need callee save scratch register for leaf runtime calls.");
#define R(REG) (1 << REG)
constexpr int kStoreBufferWrapperSize = 32;
class CallingConventions {
public:

View file

@ -57,12 +57,12 @@ void RuntimeEntry::CallInternal(const RuntimeEntry* runtime_entry,
// call.
// This sequence may occur in an intrinsic, so don't use registers an
// intrinsic must preserve.
COMPILE_ASSERT(R23 != CODE_REG);
COMPILE_ASSERT(R25 != CODE_REG);
COMPILE_ASSERT(R23 != ARGS_DESC_REG);
COMPILE_ASSERT(R25 != ARGS_DESC_REG);
__ mov(R23, CSP);
__ mov(R25, SP);
COMPILE_ASSERT(kCallLeafRuntimeCalleeSaveScratch1 != CODE_REG);
COMPILE_ASSERT(kCallLeafRuntimeCalleeSaveScratch2 != CODE_REG);
COMPILE_ASSERT(kCallLeafRuntimeCalleeSaveScratch1 != ARGS_DESC_REG);
COMPILE_ASSERT(kCallLeafRuntimeCalleeSaveScratch2 != ARGS_DESC_REG);
__ mov(kCallLeafRuntimeCalleeSaveScratch1, CSP);
__ mov(kCallLeafRuntimeCalleeSaveScratch2, SP);
__ ReserveAlignedFrameSpace(0);
__ mov(CSP, SP);
__ ldr(TMP,
@ -71,8 +71,8 @@ void RuntimeEntry::CallInternal(const RuntimeEntry* runtime_entry,
__ blr(TMP);
__ LoadImmediate(TMP, VMTag::kDartCompiledTagId);
__ str(TMP, compiler::Address(THR, Thread::vm_tag_offset()));
__ mov(SP, R25);
__ mov(CSP, R23);
__ mov(SP, kCallLeafRuntimeCalleeSaveScratch2);
__ mov(CSP, kCallLeafRuntimeCalleeSaveScratch1);
ASSERT((kAbiPreservedCpuRegs & (1 << THR)) != 0);
ASSERT((kAbiPreservedCpuRegs & (1 << PP)) != 0);
} else {