From 1e0728c5d48d87b38c54f655702bf2e639aa4f34 Mon Sep 17 00:00:00 2001 From: Paul Gofman Date: Wed, 31 Jan 2024 14:24:24 -0600 Subject: [PATCH] ntdll: Don't hardcode xstate feature mask. --- dlls/ntdll/unix/signal_i386.c | 61 +++++++++++++++++---------------- dlls/ntdll/unix/signal_x86_64.c | 61 +++++++++++++++++++++------------ dlls/ntdll/unix/system.c | 4 +++ dlls/ntdll/unix/unix_private.h | 9 +++++ 4 files changed, 84 insertions(+), 51 deletions(-) diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index cd6417b57b4..d634e46a80f 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -525,6 +525,7 @@ struct x86_thread_data UINT dr7; /* 1f0 */ SYSTEM_SERVICE_TABLE *syscall_table; /* 1f4 syscall table */ struct syscall_frame *syscall_frame; /* 1f8 frame pointer on syscall entry */ + UINT64 xstate_features_mask; /* 1fc */ }; C_ASSERT( sizeof(struct x86_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); @@ -609,8 +610,6 @@ struct xcontext ULONG64 host_compaction_mask; }; -extern BOOL xstate_compaction_enabled; - static inline XSTATE *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); @@ -832,7 +831,7 @@ static inline void save_context( struct xcontext *xcontext, const ucontext_t *si context->ContextFlags |= CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memcpy( context->ExtendedRegisters, fpux, sizeof(*fpux) ); if (!fpu) fpux_to_fpu( &context->FloatSave, fpux ); - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(fpux))) + if (xstate_extended_features() && (xs = XState_sig(fpux))) { context_init_xstate( context, xs ); xcontext->host_compaction_mask = xs->CompactionMask; @@ -936,7 +935,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) DWORD flags = context->ContextFlags & ~CONTEXT_i386; BOOL self = (handle == GetCurrentThread()); - if ((flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -944,7 +943,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & XSTATE_MASK_GSSE) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1138,7 +1137,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->ContextFlags |= CONTEXT_EXTENDED_REGISTERS; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1148,7 +1147,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1485,7 +1484,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, void *stack_ptr, context_init_xstate( &stack->context, dst_xs ); memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); - dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { dst_xs->Mask = 4; @@ -1587,7 +1586,7 @@ NTSTATUS call_user_exception_dispatcher( EXCEPTION_RECORD *rec, CONTEXT *context context_init_xstate( &stack->context, dst_xs ); memset( dst_xs, 0, offsetof(XSTATE, YmmContext) ); - dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + dst_xs->CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { dst_xs->Mask = 4; @@ -2481,6 +2480,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB ldt_set_fs( thread_data->fs, teb ); thread_data->gs = get_gs(); thread_data->syscall_table = KeServiceDescriptorTable; + thread_data->xstate_features_mask = xstate_supported_features_mask; context.SegCs = get_cs(); context.SegDs = get_ds(); @@ -2504,6 +2504,8 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB *ctx = context; ctx->ContextFlags = CONTEXT_FULL | CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS; memset( frame, 0, sizeof(*frame) ); + if (xstate_compaction_enabled) + frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx ); stack = (DWORD *)ctx; @@ -2605,26 +2607,27 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "addl %fs:0x1f4,%ebx\n\t" /* x86_thread_data()->syscall_table */ "testl $3,(%ecx)\n\t" /* frame->syscall_flags & (SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC) */ "jz 2f\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" - "movl %edx,0x240(%ecx)\n\t" - "movl %edx,0x244(%ecx)\n\t" - "movl %edx,0x248(%ecx)\n\t" - "movl %edx,0x24c(%ecx)\n\t" - "movl %edx,0x250(%ecx)\n\t" - "movl %edx,0x254(%ecx)\n\t" + "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ + "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ + "xorl %edi,%edi\n\t" + "movl %edi,0x240(%ecx)\n\t" + "movl %edi,0x244(%ecx)\n\t" + "movl %edi,0x248(%ecx)\n\t" + "movl %edi,0x24c(%ecx)\n\t" + "movl %edi,0x250(%ecx)\n\t" + "movl %edi,0x254(%ecx)\n\t" "testl $2,(%ecx)\n\t" /* frame->syscall_flags & SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" - "movl %edx,0x258(%ecx)\n\t" - "movl %edx,0x25c(%ecx)\n\t" - "movl %edx,0x260(%ecx)\n\t" - "movl %edx,0x264(%ecx)\n\t" - "movl %edx,0x268(%ecx)\n\t" - "movl %edx,0x26c(%ecx)\n\t" - "movl %edx,0x270(%ecx)\n\t" - "movl %edx,0x274(%ecx)\n\t" - "movl %edx,0x278(%ecx)\n\t" - "movl %edx,0x27c(%ecx)\n\t" + "movl %edi,0x258(%ecx)\n\t" + "movl %edi,0x25c(%ecx)\n\t" + "movl %edi,0x260(%ecx)\n\t" + "movl %edi,0x264(%ecx)\n\t" + "movl %edi,0x268(%ecx)\n\t" + "movl %edi,0x26c(%ecx)\n\t" + "movl %edi,0x270(%ecx)\n\t" + "movl %edi,0x274(%ecx)\n\t" + "movl %edi,0x278(%ecx)\n\t" + "movl %edi,0x27c(%ecx)\n\t" /* The xsavec instruction is not supported by * binutils < 2.25. */ ".byte 0x0f, 0xc7, 0x61, 0x40\n\t" /* xsavec 0x40(%ecx) */ @@ -2669,8 +2672,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "testl $3,%ecx\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" "movl %eax,%esi\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" + "movl %fs:0x1fc,%eax\n\t" /* x86_thread_data()->xstate_features_mask */ + "movl %fs:0x200,%edx\n\t" /* x86_thread_data()->xstate_features_mask high dword */ "xrstor 0x40(%esp)\n\t" "movl %esi,%eax\n\t" "jmp 3f\n" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 405ac49e5a3..aaa9ea0b638 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -441,6 +441,8 @@ struct amd64_thread_data struct syscall_frame *syscall_frame; /* 0328 syscall frame pointer */ SYSTEM_SERVICE_TABLE *syscall_table; /* 0330 syscall table */ DWORD fs; /* 0338 WOW TEB selector */ + DWORD align; + UINT64 xstate_features_mask; /* 0340 */ }; C_ASSERT( sizeof(struct amd64_thread_data) <= sizeof(((struct ntdll_thread_data *)0)->cpu_data) ); @@ -448,6 +450,7 @@ C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, pth C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_frame ) == 0x328 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, syscall_table ) == 0x330 ); C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, fs ) == 0x338 ); +C_ASSERT( offsetof( TEB, GdiTebBatch ) + offsetof( struct amd64_thread_data, xstate_features_mask ) == 0x340 ); static inline struct amd64_thread_data *amd64_thread_data(void) { @@ -477,8 +480,6 @@ struct xcontext ULONG64 host_compaction_mask; }; -extern BOOL xstate_compaction_enabled; - static inline XSTATE *xstate_from_context( const CONTEXT *context ) { CONTEXT_EX *xctx = (CONTEXT_EX *)(context + 1); @@ -898,7 +899,7 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex context->ContextFlags |= CONTEXT_FLOATING_POINT; context->FltSave = *FPU_sig(sigcontext); context->MxCsr = context->FltSave.MxCsr; - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + if (xstate_extended_features() && (xs = XState_sig(FPU_sig(sigcontext)))) { /* xcontext and sigcontext are both on the signal stack, so we can * just reference sigcontext without overflowing 32 bit XState.Offset */ @@ -928,7 +929,7 @@ static void restore_context( const struct xcontext *xcontext, ucontext_t *sigcon amd64_thread_data()->dr7 = context->Dr7; set_sigcontext( context, sigcontext ); if (FPU_sig(sigcontext)) *FPU_sig(sigcontext) = context->FltSave; - if ((cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX) && (xs = XState_sig(FPU_sig(sigcontext)))) + if (xstate_extended_features() && (xs = XState_sig(FPU_sig(sigcontext)))) xs->CompactionMask = xcontext->host_compaction_mask; leave_handler( sigcontext ); } @@ -977,7 +978,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) BOOL self = (handle == GetCurrentThread()); struct syscall_frame *frame = amd64_thread_data()->syscall_frame; - if ((flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xs = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -985,7 +986,7 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context ) if (context_ex->XState.Length < offsetof(XSTATE, YmmContext) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - if ((xs->Mask & XSTATE_MASK_GSSE) && (context_ex->XState.Length < sizeof(XSTATE))) + if ((xs->Mask & xstate_extended_features()) && (context_ex->XState.Length < sizeof(XSTATE))) return STATUS_BUFFER_OVERFLOW; } else flags &= ~CONTEXT_XSTATE; @@ -1155,7 +1156,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) context->MxCsr = context->FltSave.MxCsr; context->ContextFlags |= CONTEXT_FLOATING_POINT; } - if ((needed_flags & CONTEXT_XSTATE) && (cpu_info.ProcessorFeatureBits & CPU_FEATURE_AVX)) + if ((needed_flags & CONTEXT_XSTATE) && xstate_extended_features()) { CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1); XSTATE *xstate = (XSTATE *)((char *)context_ex + context_ex->XState.Offset); @@ -1165,7 +1166,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) || context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1377,7 +1378,7 @@ NTSTATUS get_thread_wow64_context( HANDLE handle, void *ctx, ULONG size ) context_ex->XState.Length > sizeof(XSTATE)) return STATUS_INVALID_PARAMETER; - mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & XSTATE_MASK_GSSE; + mask = (xstate_compaction_enabled ? xstate->CompactionMask : xstate->Mask) & xstate_extended_features(); xstate->Mask = frame->xstate.Mask & mask; xstate->CompactionMask = xstate_compaction_enabled ? (0x8000000000000000 | mask) : 0; memset( xstate->Reserved, 0, sizeof(xstate->Reserved) ); @@ -1442,7 +1443,7 @@ static void setup_raise_exception( ucontext_t *sigcontext, EXCEPTION_RECORD *rec assert( !((ULONG_PTR)&stack->xstate & 63) ); context_init_xstate( &stack->context, &stack->xstate ); memset( &stack->xstate, 0, offsetof(XSTATE, YmmContext) ); - stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000004 : 0; + stack->xstate.CompactionMask = xstate_compaction_enabled ? 0x8000000000000000 | xstate_extended_features() : 0; if (src_xs->Mask & 4) { stack->xstate.Mask = 4; @@ -2479,6 +2480,7 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB I386_CONTEXT *wow_context; thread_data->syscall_table = KeServiceDescriptorTable; + thread_data->xstate_features_mask = xstate_supported_features_mask; #if defined __linux__ arch_prctl( ARCH_SET_GS, teb ); @@ -2539,6 +2541,8 @@ void call_init_thunk( LPTHREAD_START_ROUTINE entry, void *arg, BOOL suspend, TEB *ctx = context; ctx->ContextFlags = CONTEXT_FULL; memset( frame, 0, sizeof(*frame) ); + if (xstate_compaction_enabled) + frame->xstate.CompactionMask = 0x8000000000000000 | xstate_supported_features_mask; NtSetContextThread( GetCurrentThread(), ctx ); frame->cs = cs64_sel; @@ -2636,18 +2640,25 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "movl 0xb0(%rcx),%r14d\n\t" /* frame->syscall_flags */ "testl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 2f\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" - "movq %rdx,0x2c0(%rcx)\n\t" - "movq %rdx,0x2c8(%rcx)\n\t" - "movq %rdx,0x2d0(%rcx)\n\t" +#ifdef __APPLE__ + "movq %gs:0x30,%rdx\n\t" + "movl 0x340(%rdx),%eax\n\t" + "movl 0x344(%rdx),%edx\n\t" +#else + "movl %gs:0x340,%eax\n\t" /* amd64_thread_data()->xstate_features_mask */ + "movl %gs:0x344,%edx\n\t" /* amd64_thread_data()->xstate_features_mask high dword */ +#endif + "xorq %rbp,%rbp\n\t" + "movq %rbp,0x2c0(%rcx)\n\t" + "movq %rbp,0x2c8(%rcx)\n\t" + "movq %rbp,0x2d0(%rcx)\n\t" "testl $2,%r14d\n\t" /* SYSCALL_HAVE_XSAVEC */ "jz 1f\n\t" - "movq %rdx,0x2d8(%rcx)\n\t" - "movq %rdx,0x2e0(%rcx)\n\t" - "movq %rdx,0x2e8(%rcx)\n\t" - "movq %rdx,0x2f0(%rcx)\n\t" - "movq %rdx,0x2f8(%rcx)\n\t" + "movq %rbp,0x2d8(%rcx)\n\t" + "movq %rbp,0x2e0(%rcx)\n\t" + "movq %rbp,0x2e8(%rcx)\n\t" + "movq %rbp,0x2f0(%rcx)\n\t" + "movq %rbp,0x2f8(%rcx)\n\t" /* The xsavec instruction is not supported by * binutils < 2.25. */ ".byte 0x48, 0x0f, 0xc7, 0xa1, 0xc0, 0x00, 0x00, 0x00\n\t" /* xsavec64 0xc0(%rcx) */ @@ -2749,8 +2760,14 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "2:\ttestl $3,%r14d\n\t" /* SYSCALL_HAVE_XSAVE | SYSCALL_HAVE_XSAVEC */ "jz 3f\n\t" "movq %rax,%r11\n\t" - "movl $7,%eax\n\t" - "xorl %edx,%edx\n\t" +#ifdef __APPLE__ + "movq %gs:0x30,%rdx\n\t" + "movl 0x340(%rdx),%eax\n\t" + "movl 0x344(%rdx),%edx\n\t" +#else + "movl %gs:0x340,%eax\n\t" /* amd64_thread_data()->xstate_features_mask */ + "movl %gs:0x344,%edx\n\t" /* amd64_thread_data()->xstate_features_mask high dword */ +#endif "xrstor64 0xc0(%rcx)\n\t" "movq %r11,%rax\n\t" "movl 0xb4(%rcx),%edx\n\t" /* frame->restore_flags */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index cb99b7c2cc3..2c71cc67835 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -247,6 +247,7 @@ static pthread_mutex_t timezone_mutex = PTHREAD_MUTEX_INITIALIZER; #if defined(__i386__) || defined(__x86_64__) BOOL xstate_compaction_enabled = FALSE; +UINT64 xstate_supported_features_mask; #define AUTH 0x68747541 /* "Auth" */ #define ENTI 0x69746e65 /* "enti" */ @@ -396,6 +397,9 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) { do_cpuid( 0x0000000d, 1, regs3 ); /* get XSAVE details */ if (regs3[0] & 2) xstate_compaction_enabled = TRUE; + xstate_supported_features_mask = 3; + if (features & CPU_FEATURE_AVX) + xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; } if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h index cbf72651a9a..1311e5a1070 100644 --- a/dlls/ntdll/unix/unix_private.h +++ b/dlls/ntdll/unix/unix_private.h @@ -214,6 +214,15 @@ extern int server_pipe( int fd[2] ); extern void fpux_to_fpu( I386_FLOATING_SAVE_AREA *fpu, const XSAVE_FORMAT *fpux ); extern void fpu_to_fpux( XSAVE_FORMAT *fpux, const I386_FLOATING_SAVE_AREA *fpu ); + +extern BOOL xstate_compaction_enabled; +extern UINT64 xstate_supported_features_mask; + +static inline UINT64 xstate_extended_features(void) +{ + return xstate_supported_features_mask & ~(UINT64)3; +} + extern void *get_cpu_area( USHORT machine ); extern void set_thread_id( TEB *teb, DWORD pid, DWORD tid ); extern NTSTATUS init_thread_stack( TEB *teb, ULONG_PTR limit, SIZE_T reserve_size, SIZE_T commit_size );