ntdll: Fix restoring X16 and X17 in ARM64 syscall dispatcher.

Today, NtContinue() on ARM64 does not restore X16 and X17 from the
context.

This is because the values for X16 and X17 are overwritten when the
current thread returns to the "user mode" (PE side) via
__wine_syscall_dispatcher, which in turn uses them as scratch registers
for restoring SP and PC respectively.

We cannot avoid using scratch registers when restoring SP and PC.  This
is because ARMv8 does not have an unprivileged (EL0) instruction that
loads SP and PC from memory or non-GPR architectural state.

Fix this by making ARM64 __wine_syscall_dispatcher perform a full
context restore via raise(SIGUSR2) when NtContinue() is used.

Since raising a signal is quite expensive, it should be done only when
necessary. To achieve this, split the ARM64 syscall dispatcher's
returning behaviour into a fast path (that does not involve signals) and
a slow path (that involves signals):

- If CONTEXT_INTEGER is not set, the dispatcher takes the fast path:
  the X16 and X17 registers are clobbered as usual.

- If X16 == PC and X17 == SP, the dispatcher also takes the fast path:
  it can safely use X16 and X17 without corrupting the register values,
  since those two registers already have the desired values.

  This fast path is used in call_user_apc_dispatcher(),
  call_user_exception_dispatcher(), and call_init_thunk().

- Otherwise, the dispatcher takes the slow path: it raises SIGUSR2 and
  does full context restore in the signal handler.

Fixes: 88e336214d
This commit is contained in:
Jinoh Kang 2021-11-09 00:41:17 +09:00 committed by Alexandre Julliard
parent 24557a257a
commit 057467bff9

View file

@ -477,6 +477,21 @@ NTSTATUS unwind_builtin_dll( void *args )
}
/***********************************************************************
* syscall_frame_fixup_for_fastpath
*
* Fixes up the given syscall frame such that the syscall dispatcher
* can return via the fast path if CONTEXT_INTEGER is set in
* restore_flags.
*
* Clobbers the frame's X16 and X17 register values.
*/
static void syscall_frame_fixup_for_fastpath( struct syscall_frame *frame )
{
frame->x[16] = frame->pc;
frame->x[17] = frame->sp;
}
/***********************************************************************
* save_fpu
*
@ -1054,6 +1069,7 @@ NTSTATUS call_user_apc_dispatcher( CONTEXT *context, ULONG_PTR arg1, ULONG_PTR a
frame->x[3] = arg3;
frame->x[4] = (ULONG64)func;
frame->restore_flags |= CONTEXT_CONTROL | CONTEXT_INTEGER;
syscall_frame_fixup_for_fastpath( frame );
return status;
}
@ -1086,6 +1102,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context
frame->lr = lr;
frame->sp = sp;
frame->restore_flags |= CONTEXT_INTEGER | CONTEXT_CONTROL;
syscall_frame_fixup_for_fastpath( frame );
return status;
}
@ -1578,6 +1595,14 @@ void signal_init_process(void)
}
/***********************************************************************
* syscall_dispatcher_return_slowpath
*/
void DECLSPEC_HIDDEN syscall_dispatcher_return_slowpath(void)
{
raise( SIGUSR2 );
}
/***********************************************************************
* call_init_thunk
*/
@ -1638,6 +1663,7 @@ void DECLSPEC_HIDDEN call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, B
frame->x[18] = (ULONG64)teb;
frame->prev_frame = NULL;
frame->restore_flags |= CONTEXT_INTEGER;
syscall_frame_fixup_for_fastpath( frame );
frame->syscall_table = KeServiceDescriptorTable;
pthread_sigmask( SIG_UNBLOCK, &server_block_set, NULL );
@ -1734,13 +1760,28 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher,
"blr x16\n\t"
"mov sp, x22\n"
__ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") ":\n\t"
"ldp x18, x19, [sp, #0x90]\n\t"
"ldr w16, [sp, #0x10c]\n\t" /* frame->restore_flags */
"tbz x16, #1, 2f\n\t" /* CONTEXT_INTEGER */
"ldp x12, x13, [sp, #0x80]\n\t" /* frame->x[16..17] */
"ldp x14, x15, [sp, #0xf8]\n\t" /* frame->sp, frame->pc */
"cmp x12, x15\n\t" /* frame->x16 == frame->pc? */
"ccmp x13, x14, #0, eq\n\t" /* frame->x17 == frame->sp? */
"beq 1f\n\t" /* take slowpath if unequal */
"bl " __ASM_NAME("syscall_dispatcher_return_slowpath") "\n"
"1:\tldp x0, x1, [sp, #0x00]\n\t"
"ldp x2, x3, [sp, #0x10]\n\t"
"ldp x4, x5, [sp, #0x20]\n\t"
"ldp x6, x7, [sp, #0x30]\n\t"
"ldp x8, x9, [sp, #0x40]\n\t"
"ldp x10, x11, [sp, #0x50]\n\t"
"ldp x12, x13, [sp, #0x60]\n\t"
"ldp x14, x15, [sp, #0x70]\n"
"2:\tldp x18, x19, [sp, #0x90]\n\t"
"ldp x20, x21, [sp, #0xa0]\n\t"
"ldp x22, x23, [sp, #0xb0]\n\t"
"ldp x24, x25, [sp, #0xc0]\n\t"
"ldp x26, x27, [sp, #0xd0]\n\t"
"ldp x28, x29, [sp, #0xe0]\n\t"
"ldr w16, [sp, #0x10c]\n\t" /* frame->restore_flags */
"tbz x16, #2, 1f\n\t" /* CONTEXT_FLOATING_POINT */
"ldp q0, q1, [sp, #0x130]\n\t"
"ldp q2, q3, [sp, #0x150]\n\t"
@ -1758,19 +1799,10 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher,
"ldp q26, q27, [sp, #0x2d0]\n\t"
"ldp q28, q29, [sp, #0x2f0]\n\t"
"ldp q30, q31, [sp, #0x310]\n\t"
"ldr w9, [sp, #0x128]\n\t"
"msr FPCR, x9\n\t"
"ldr w9, [sp, #0x12c]\n\t"
"msr FPSR, x9\n"
"1:\ttbz x16, #1, 1f\n\t" /* CONTEXT_INTEGER */
"ldp x0, x1, [sp, #0x00]\n\t"
"ldp x2, x3, [sp, #0x10]\n\t"
"ldp x4, x5, [sp, #0x20]\n\t"
"ldp x6, x7, [sp, #0x30]\n\t"
"ldp x8, x9, [sp, #0x40]\n\t"
"ldp x10, x11, [sp, #0x50]\n\t"
"ldp x12, x13, [sp, #0x60]\n\t"
"ldp x14, x15, [sp, #0x70]\n"
"ldr w17, [sp, #0x128]\n\t"
"msr FPCR, x17\n\t"
"ldr w17, [sp, #0x12c]\n\t"
"msr FPSR, x17\n"
"1:\tldp x16, x17, [sp, #0x100]\n\t"
"msr NZCV, x17\n\t"
"ldp x30, x17, [sp, #0xf0]\n\t"