Revert "[vm, arm64] Adjust CSP during the invocation stub instead of each function prologue."

This reverts commit b5b322962a.

Reason for revert: low frequency of crashes in service tests

Original change's description:
> [vm, arm64] Adjust CSP during the invocation stub instead of each function prologue.
> 
> Since 6e2c4636cd, we have more reliable information about the stack limit.
> 
> This saves 8 bytes from each function.
> 
> Flutter Gallery:
> Instructions(CodeSize): 6491472 ->  6375472 (-1.79%)
> Total(CodeSize):       10375882 -> 10258802 (-1.13%)
> 
> Bug: http://dartbug.com/26472
> Change-Id: Ief1ddd25eecd32a8314c71fdb470dd73046e5dc0
> Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/122408
> Commit-Queue: Ryan Macnak <rmacnak@google.com>
> Reviewed-by: Alexander Markov <alexmarkov@google.com>

TBR=rmacnak@google.com,alexmarkov@google.com,ajcbik@google.com

Change-Id: I0b45e1c81c1534e123dd85d27b7af27217e08795
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: http://dartbug.com/26472
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/122725
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
This commit is contained in:
Ryan Macnak 2019-10-24 04:12:16 +00:00 committed by commit-bot@chromium.org
parent b5b322962a
commit 86af66a3ee
16 changed files with 469 additions and 485 deletions

View file

@ -154,7 +154,7 @@ ThreadId Thread::GetCurrentThreadId() {
}
intptr_t Thread::ThreadIdToIntPtr(ThreadId id) {
ASSERT(sizeof(id) <= sizeof(intptr_t));
ASSERT(sizeof(id) == sizeof(intptr_t));
return static_cast<intptr_t>(id);
}

View file

@ -1207,27 +1207,13 @@ void Assembler::CheckCodePointer() {
#endif
}
// The ARM64 ABI requires at all times
// - stack limit < CSP <= stack base
// - CSP mod 16 = 0
// - we do not access stack memory below CSP
// Practically, this means we need to keep the C stack pointer ahead of the
// Dart stack pointer and 16-byte aligned for signal handlers. We set
// CSP to a value near the stack limit during SetupDartSP*, and use a different
// register within our generated code to avoid the alignment requirement.
// Note that Fuchsia does not have signal handlers.
void Assembler::SetupDartSP() {
mov(SP, CSP);
// The caller doesn't have a Thread available. Just kick CSP forward a bit.
AddImmediate(CSP, CSP, -kSpaceForSignalHandlers);
}
void Assembler::SetupDartSPFromThread(Register thr) {
ldr(TMP, Address(thr, target::Thread::saved_stack_limit_offset()));
mov(SP, CSP);
// This is a Dart entry stub. Set CSP close to the stack limit.
AddImmediate(CSP, TMP, kSpaceForSignalHandlers);
#if defined(TARGET_OS_FUCHSIA)
// Make any future signal handlers fail fast. Verifies our assumption in
// EnterFrame.
orri(CSP, ZR, Immediate(16));
#endif
}
void Assembler::RestoreCSP() {
@ -1235,6 +1221,25 @@ void Assembler::RestoreCSP() {
}
void Assembler::EnterFrame(intptr_t frame_size) {
// The ARM64 ABI requires at all times
// - stack limit < CSP <= stack base
// - CSP mod 16 = 0
// - we do not access stack memory below CSP
// Pratically, this means we need to keep the C stack pointer ahead of the
// Dart stack pointer and 16-byte aligned for signal handlers. If we knew the
// real stack limit, we could just set CSP to a value near it during
// SetupDartSP, but we do not know the real stack limit for the initial
// thread or threads created by the embedder.
// TODO(26472): It would be safer to use CSP as the Dart stack pointer, but
// this requires adjustments to stack handling to maintain the 16-byte
// alignment.
// Note Fuchsia does not have signal handlers; see also SetupDartSP.
#if !defined(TARGET_OS_FUCHSIA)
const intptr_t kMaxDartFrameSize = 4096;
sub(TMP, SP, Operand(kMaxDartFrameSize));
andi(CSP, TMP, Immediate(~15));
#endif
PushPair(FP, LR); // low: FP, high: LR.
mov(FP, SP);

View file

@ -1532,9 +1532,7 @@ class Assembler : public AssemblerBase {
void LoadClassIdMayBeSmi(Register result, Register object);
void LoadTaggedClassIdMayBeSmi(Register result, Register object);
static constexpr intptr_t kSpaceForSignalHandlers = 4096;
void SetupDartSP();
void SetupDartSPFromThread(Register thr);
void RestoreCSP();
void EnterFrame(intptr_t frame_size);

View file

@ -921,14 +921,12 @@ void FfiCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
// We are entering runtime code, so the C stack pointer must be restored
// from the stack limit to the top of the stack.
__ mov(R25, CSP);
__ mov(CSP, SP);
__ blr(branch);
// Restore the Dart stack pointer.
__ mov(SP, CSP);
__ mov(CSP, R25);
// Update information in the thread object and leave the safepoint.
__ TransitionNativeToGenerated(temp, /*leave_safepoint=*/true);
@ -1036,9 +1034,7 @@ void NativeEntryInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
__ Bind(compiler->GetJumpLabel(this));
// We don't use the regular stack pointer in ARM64, so we have to copy the
// native stack pointer into the Dart stack pointer. This will also kick CSP
// forward a bit, enough for the spills and leaf call below, until we can set
// it properly after setting up THR.
// native stack pointer into the Dart stack pointer.
__ SetupDartSP();
// Create a dummy frame holding the pushed arguments. This simplifies
@ -1106,11 +1102,6 @@ void NativeEntryInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
__ LeaveFrame();
}
// Now we have THR and can set CSP. See SetupDartSPFromThread.
__ ldr(TMP, compiler::Address(
THR, compiler::target::Thread::saved_stack_limit_offset()));
__ AddImmediate(CSP, TMP, compiler::Assembler::kSpaceForSignalHandlers);
// Refresh write barrier mask.
__ ldr(BARRIER_MASK,
compiler::Address(
@ -2959,8 +2950,7 @@ void CheckStackOverflowInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
CheckStackOverflowSlowPath* slow_path = new CheckStackOverflowSlowPath(this);
compiler->AddSlowPathCode(slow_path);
__ ldr(TMP, compiler::Address(
THR, compiler::target::Thread::stack_limit_offset()));
__ ldr(TMP, compiler::Address(THR, Thread::stack_limit_offset()));
__ CompareRegisters(SP, TMP);
__ b(slow_path->entry_label(), LS);
if (compiler->CanOSRFunction() && in_loop()) {

View file

@ -668,7 +668,6 @@ class Thread : public AllStatic {
static word stack_overflow_flags_offset();
static word stack_overflow_shared_stub_entry_point_offset(bool fpu_regs);
static word stack_limit_offset();
static word saved_stack_limit_offset();
static word unboxed_int64_runtime_arg_offset();
static word callback_code_offset();

File diff suppressed because it is too large Load diff

View file

@ -202,7 +202,6 @@
FIELD(Thread, safepoint_state_offset) \
NOT_IN_DBC(FIELD(Thread, slow_type_test_stub_offset)) \
FIELD(Thread, stack_limit_offset) \
FIELD(Thread, saved_stack_limit_offset) \
FIELD(Thread, stack_overflow_flags_offset) \
NOT_IN_DBC( \
FIELD(Thread, stack_overflow_shared_with_fpu_regs_entry_point_offset)) \

View file

@ -80,7 +80,7 @@ class StubCodeCompiler : public AllStatic {
static constexpr intptr_t kNativeCallbackTrampolineStackDelta = 4;
#elif defined(TARGET_ARCH_ARM64)
static constexpr intptr_t kNativeCallbackTrampolineSize = 12;
static constexpr intptr_t kNativeCallbackSharedStubSize = 268;
static constexpr intptr_t kNativeCallbackSharedStubSize = 284;
static constexpr intptr_t kNativeCallbackTrampolineStackDelta = 2;
#endif

View file

@ -212,20 +212,18 @@ void StubCodeCompiler::GenerateEnterSafepointStub(Assembler* assembler) {
__ EnterFrame(0);
__ PushRegisters(all_registers);
__ mov(CALLEE_SAVED_TEMP, CSP);
__ mov(CALLEE_SAVED_TEMP2, SP);
__ mov(CALLEE_SAVED_TEMP, SP);
__ ReserveAlignedFrameSpace(0);
__ mov(CSP, SP);
__ mov(CSP, SP);
__ ldr(R0, Address(THR, kEnterSafepointRuntimeEntry.OffsetFromThread()));
__ blr(R0);
__ mov(SP, CALLEE_SAVED_TEMP2);
__ mov(CSP, CALLEE_SAVED_TEMP);
__ mov(SP, CALLEE_SAVED_TEMP);
__ PopRegisters(all_registers);
__ LeaveFrame();
__ mov(CSP, SP);
__ Ret();
}
@ -236,9 +234,9 @@ void StubCodeCompiler::GenerateExitSafepointStub(Assembler* assembler) {
__ EnterFrame(0);
__ PushRegisters(all_registers);
__ mov(CALLEE_SAVED_TEMP, CSP);
__ mov(CALLEE_SAVED_TEMP2, SP);
__ mov(CALLEE_SAVED_TEMP, SP);
__ ReserveAlignedFrameSpace(0);
__ mov(CSP, SP);
// Set the execution state to VM while waiting for the safepoint to end.
@ -249,13 +247,12 @@ void StubCodeCompiler::GenerateExitSafepointStub(Assembler* assembler) {
__ ldr(R0, Address(THR, kExitSafepointRuntimeEntry.OffsetFromThread()));
__ blr(R0);
__ mov(SP, CALLEE_SAVED_TEMP2);
__ mov(CSP, CALLEE_SAVED_TEMP);
__ mov(SP, CALLEE_SAVED_TEMP);
__ PopRegisters(all_registers);
__ LeaveFrame();
__ mov(CSP, SP);
__ Ret();
}
@ -275,7 +272,6 @@ void StubCodeCompiler::GenerateCallNativeThroughSafepointStub(
__ mov(R19, LR);
__ TransitionGeneratedToNative(R8, FPREG, R9 /*volatile*/,
/*enter_safepoint=*/true);
__ mov(R25, CSP);
__ mov(CSP, SP);
#if defined(DEBUG)
@ -290,10 +286,7 @@ void StubCodeCompiler::GenerateCallNativeThroughSafepointStub(
#endif
__ blr(R8);
__ mov(SP, CSP);
__ mov(CSP, R25);
__ TransitionNativeToGenerated(R9, /*leave_safepoint=*/true);
__ ret(R19);
}
@ -1320,11 +1313,9 @@ void StubCodeCompiler::GenerateAllocateArrayStub(Assembler* assembler) {
void StubCodeCompiler::GenerateInvokeDartCodeStub(Assembler* assembler) {
__ Comment("InvokeDartCodeStub");
// Copy the C stack pointer (CSP/R31) into the stack pointer we'll actually
// use to access the stack (SP/R15) and set the C stack pointer to near the
// stack limit, loaded from the Thread held in R3, to prevent signal handlers
// from over-writing Dart frames.
__ SetupDartSPFromThread(R3);
// Copy the C stack pointer (R31) into the stack pointer we'll actually use
// to access the stack.
__ SetupDartSP();
__ Push(LR); // Marker for the profiler.
__ EnterFrame(0);
@ -1469,11 +1460,9 @@ void StubCodeCompiler::GenerateInvokeDartCodeFromBytecodeStub(
#if defined(DART_PRECOMPILED_RUNTIME)
__ Stop("Not using interpreter");
#else
// Copy the C stack pointer (CSP/R31) into the stack pointer we'll actually
// use to access the stack (SP/R15) and set the C stack pointer to near the
// stack limit, loaded from the Thread held in R3, to prevent signal handlers
// from over-writing Dart frames.
__ SetupDartSPFromThread(R3);
// Copy the C stack pointer (R31) into the stack pointer we'll actually use
// to access the stack.
__ SetupDartSP();
__ Push(LR); // Marker for the profiler.
__ EnterFrame(0);

View file

@ -41,6 +41,14 @@ class ScopedIsolateStackLimits : public ValueObject {
explicit ScopedIsolateStackLimits(Thread* thread, uword current_sp)
: thread_(thread) {
ASSERT(thread != NULL);
// Set the thread's stack_base based on the current
// stack pointer, we keep refining this value as we
// see higher stack pointers (Note: we assume the stack
// grows from high to low addresses).
OSThread* os_thread = thread->os_thread();
ASSERT(os_thread != NULL);
os_thread->RefineStackBoundsFromSP(current_sp);
// Save the Thread's current stack limit and adjust the stack limit.
ASSERT(thread->isolate() == Isolate::Current());
saved_stack_limit_ = thread->saved_stack_limit();

View file

@ -44,7 +44,8 @@ OSThread::OSThread()
thread_(NULL) {
// Try to get accurate stack bounds from pthreads, etc.
if (!GetCurrentStackBounds(&stack_limit_, &stack_base_)) {
FATAL("Failed to retrieve stack bounds");
// Fall back to a guess based on the stack pointer.
RefineStackBoundsFromSP(GetCurrentStackPointer());
}
stack_headroom_ = CalculateHeadroom(stack_base_ - stack_limit_);

View file

@ -138,7 +138,14 @@ class OSThread : public BaseThread {
return GetCurrentStackPointer() > (stack_limit_ + headroom);
}
// May fail for the main thread on Linux if resources are low.
void RefineStackBoundsFromSP(uword sp) {
if (sp > stack_base_) {
stack_base_ = sp;
stack_limit_ = sp - GetSpecifiedStackSize();
}
}
// May fail for the main thread on Linux and Android.
static bool GetCurrentStackBounds(uword* lower, uword* upper);
// Returns the current C++ stack pointer. Equivalent taking the address of a

View file

@ -215,7 +215,7 @@ void OSThread::Join(ThreadJoinId id) {
}
intptr_t OSThread::ThreadIdToIntPtr(ThreadId id) {
ASSERT(sizeof(id) <= sizeof(intptr_t));
ASSERT(sizeof(id) == sizeof(intptr_t));
return static_cast<intptr_t>(id);
}
@ -229,6 +229,7 @@ bool OSThread::Compare(ThreadId a, ThreadId b) {
bool OSThread::GetCurrentStackBounds(uword* lower, uword* upper) {
pthread_attr_t attr;
// May fail on the main thread.
if (pthread_getattr_np(pthread_self(), &attr) != 0) {
return false;
}

View file

@ -60,7 +60,6 @@ Thread::~Thread() {
Thread::Thread(bool is_vm_isolate)
: ThreadState(false),
stack_limit_(0),
saved_stack_limit_(0),
stack_overflow_flags_(0),
write_barrier_mask_(RawObject::kGenerationalBarrierMask),
isolate_(NULL),
@ -90,6 +89,7 @@ Thread::Thread(bool is_vm_isolate)
no_safepoint_scope_depth_(0),
#endif
reusable_handles_(),
saved_stack_limit_(0),
defer_oob_messages_count_(0),
deferred_interrupts_mask_(0),
deferred_interrupts_(0),

View file

@ -269,9 +269,8 @@ class Thread : public ThreadState {
void SetStackLimit(uword value);
void ClearStackLimit();
// Access to the current stack limit for generated code. Either the true OS
// thread's stack limit minus some headroom, or a special value to trigger
// interrupts.
// Access to the current stack limit for generated code. This may be
// overwritten with a special value to trigger interrupts.
uword stack_limit_address() const {
return reinterpret_cast<uword>(&stack_limit_);
}
@ -279,10 +278,7 @@ class Thread : public ThreadState {
return OFFSET_OF(Thread, stack_limit_);
}
// The true stack limit for this OS thread.
static intptr_t saved_stack_limit_offset() {
return OFFSET_OF(Thread, saved_stack_limit_);
}
// The true stack limit for this isolate.
uword saved_stack_limit() const { return saved_stack_limit_; }
#if defined(USING_SAFE_STACK)
@ -852,7 +848,6 @@ class Thread : public ThreadState {
// We use only word-sized fields to avoid differences in struct packing on the
// different architectures. See also CheckOffsets in dart.cc.
uword stack_limit_;
uword saved_stack_limit_;
uword stack_overflow_flags_;
uword write_barrier_mask_;
Isolate* isolate_;
@ -918,6 +913,7 @@ class Thread : public ThreadState {
int32_t no_safepoint_scope_depth_;
#endif
VMHandles reusable_handles_;
uword saved_stack_limit_;
intptr_t defer_oob_messages_count_;
uint16_t deferred_interrupts_mask_;
uint16_t deferred_interrupts_;

View file

@ -424,6 +424,9 @@ void ThreadPool::Worker::Main(uword args) {
ThreadId id = os_thread->id();
ThreadPool* pool;
// Set the thread's stack_base based on the current stack pointer.
os_thread->RefineStackBoundsFromSP(OSThread::GetCurrentStackPointer());
{
MonitorLocker ml(&worker->monitor_);
ASSERT(worker->task_);