From 29c73ee17335b30f3f49c8b3562742c6a35b482c Mon Sep 17 00:00:00 2001 From: Paul Gofman Date: Thu, 18 Jan 2024 12:59:26 -0600 Subject: [PATCH] ntdll: Support more xstate features. --- dlls/ntdll/tests/exception.c | 2 +- dlls/ntdll/unix/system.c | 40 +++++++++++++++++++++---- programs/wineboot/wineboot.c | 58 ++++++++++++++++++++++++++---------- 3 files changed, 79 insertions(+), 21 deletions(-) diff --git a/dlls/ntdll/tests/exception.c b/dlls/ntdll/tests/exception.c index 67b02cbd8b6..7a4e04ec5e0 100644 --- a/dlls/ntdll/tests/exception.c +++ b/dlls/ntdll/tests/exception.c @@ -9171,7 +9171,7 @@ static DWORD test_extended_context_handler(EXCEPTION_RECORD *rec, EXCEPTION_REGI } else { - ok(xs->Mask == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); + ok((xs->Mask & 7) == (xsaveopt_enabled ? 0 : 4), "Got unexpected Mask %#I64x.\n", xs->Mask); /* The save area has garbage if xsaveopt is available, so we can't test * its contents. */ diff --git a/dlls/ntdll/unix/system.c b/dlls/ntdll/unix/system.c index 068eb9d6839..15c4a49e611 100644 --- a/dlls/ntdll/unix/system.c +++ b/dlls/ntdll/unix/system.c @@ -250,8 +250,8 @@ BOOL xstate_compaction_enabled = FALSE; UINT64 xstate_supported_features_mask; UINT64 xstate_features_size; -static int xstate_feature_offset[64] = {0, 0, 576}; -static int xstate_feature_size[64] = {0, 0, 256}; +static int xstate_feature_offset[64]; +static int xstate_feature_size[64]; static UINT64 xstate_aligned_features; static int next_xstate_offset( int off, UINT64 compaction_mask, int feature_idx ) @@ -345,6 +345,21 @@ __ASM_GLOBAL_FUNC( do_cpuid, "ret" ) #endif +extern UINT64 do_xgetbv( unsigned int cx); +#ifdef __i386__ +__ASM_GLOBAL_FUNC( do_xgetbv, + "movl 4(%esp),%ecx\n\t" + "xgetbv\n\t" + "ret" ) +#else +__ASM_GLOBAL_FUNC( do_xgetbv, + "movl %edi,%ecx\n\t" + "xgetbv\n\t" + "shlq $32,%rdx\n\t" + "orq %rdx,%rax\n\t" + "ret" ) +#endif + #ifdef __i386__ extern int have_cpuid(void); __ASM_GLOBAL_FUNC( have_cpuid, @@ -403,8 +418,11 @@ static void get_cpuid_name( char *buffer ) static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) { + static const ULONG64 wine_xstate_supported_features = 0xff; /* XSTATE_AVX, XSTATE_MPX_BNDREGS, XSTATE_MPX_BNDCSR, + * XSTATE_AVX512_KMASK, XSTATE_AVX512_ZMM_H, XSTATE_AVX512_ZMM */ unsigned int regs[4], regs2[4], regs3[4]; ULONGLONG features; + unsigned int i; #if defined(__i386__) info->ProcessorArchitecture = PROCESSOR_ARCHITECTURE_INTEL; @@ -454,13 +472,25 @@ static void get_cpuinfo( SYSTEM_CPU_INFORMATION *info ) { do_cpuid( 0x0000000d, 1, regs3 ); /* get XSAVE details */ if (regs3[0] & 2) xstate_compaction_enabled = TRUE; - xstate_supported_features_mask = 3; - if (features & CPU_FEATURE_AVX) - xstate_supported_features_mask |= (UINT64)1 << XSTATE_AVX; + + do_cpuid( 0x0000000d, 0, regs3 ); /* get user xstate features */ + xstate_supported_features_mask = ((ULONG64)regs3[3] << 32) | regs3[0]; + xstate_supported_features_mask &= do_xgetbv( 0 ) & wine_xstate_supported_features; + TRACE("xstate_supported_features_mask %#llx.\n", (long long)xstate_supported_features_mask); + for (i = 2; i < 64; ++i) + { + if (!(xstate_supported_features_mask & ((ULONG64)1 << i))) continue; + do_cpuid( 0x0000000d, i, regs3 ); /* get user xstate features */ + xstate_feature_offset[i] = regs3[1]; + xstate_feature_size[i] = regs3[0]; + if (regs3[2] & 2) xstate_aligned_features |= (ULONG64)1 << i; + TRACE("xstate[%d] offset %d, size %d, aligned %d.\n", i, xstate_feature_offset[i], xstate_feature_size[i], !!(regs3[2] & 2)); + } xstate_features_size = xstate_get_size( xstate_compaction_enabled ? 0x8000000000000000 | xstate_supported_features_mask : 0, xstate_supported_features_mask ) - sizeof(XSAVE_AREA_HEADER); xstate_features_size = (xstate_features_size + 15) & ~15; + TRACE("xstate_features_size %lld.\n", (long long)xstate_features_size); } if (regs[1] == AUTH && regs[3] == ENTI && regs[2] == CAMD) diff --git a/programs/wineboot/wineboot.c b/programs/wineboot/wineboot.c index 1c1ad858fa4..67d9508389f 100644 --- a/programs/wineboot/wineboot.c +++ b/programs/wineboot/wineboot.c @@ -193,10 +193,27 @@ static DWORD set_reg_value_dword( HKEY hkey, const WCHAR *name, DWORD value ) #if defined(__i386__) || defined(__x86_64__) +extern UINT64 WINAPI do_xgetbv( unsigned int cx); +#ifdef __i386__ +__ASM_STDCALL_FUNC( do_xgetbv, 4, + "movl 4(%esp),%ecx\n\t" + "xgetbv\n\t" + "ret $4" ) +#else +__ASM_GLOBAL_FUNC( do_xgetbv, + "xgetbv\n\t" + "shlq $32,%rdx\n\t" + "orq %rdx,%rax\n\t" + "ret" ) +#endif + static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) { + static const ULONG64 wine_xstate_supported_features = 0xfc; /* XSTATE_AVX, XSTATE_MPX_BNDREGS, XSTATE_MPX_BNDCSR, + * XSTATE_AVX512_KMASK, XSTATE_AVX512_ZMM_H, XSTATE_AVX512_ZMM */ XSTATE_CONFIGURATION *xstate = &data->XState; - unsigned int i; + ULONG64 supported_mask; + unsigned int i, off; int regs[4]; if (!data->ProcessorFeatures[PF_AVX_INSTRUCTIONS_AVAILABLE]) @@ -215,29 +232,40 @@ static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) __cpuidex(regs, 0xd, 0); TRACE("XSAVE details %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); - if (!(regs[0] & XSTATE_AVX)) + supported_mask = ((ULONG64)regs[3] << 32) | regs[0]; + supported_mask &= do_xgetbv(0) & wine_xstate_supported_features; + if (!(supported_mask >> 2)) return; - xstate->EnabledFeatures = (1 << XSTATE_LEGACY_FLOATING_POINT) | (1 << XSTATE_LEGACY_SSE) | (1 << XSTATE_AVX); + xstate->EnabledFeatures = (1 << XSTATE_LEGACY_FLOATING_POINT) | (1 << XSTATE_LEGACY_SSE) | supported_mask; xstate->EnabledVolatileFeatures = xstate->EnabledFeatures; - xstate->Size = sizeof(XSAVE_FORMAT) + sizeof(XSTATE); xstate->AllFeatureSize = regs[1]; - xstate->AllFeatures[0] = offsetof(XSAVE_FORMAT, XmmRegisters); - xstate->AllFeatures[1] = sizeof(M128A) * 16; - xstate->AllFeatures[2] = sizeof(YMMCONTEXT); - - for (i = 0; i < 3; ++i) - xstate->Features[i].Size = xstate->AllFeatures[i]; - - xstate->Features[1].Offset = xstate->Features[0].Size; - xstate->Features[2].Offset = sizeof(XSAVE_FORMAT) + offsetof(XSTATE, YmmContext); __cpuidex(regs, 0xd, 1); xstate->OptimizedSave = regs[0] & 1; xstate->CompactionEnabled = !!(regs[0] & 2); - __cpuidex(regs, 0xd, 2); - TRACE("XSAVE feature 2 %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); + xstate->Features[0].Size = xstate->AllFeatures[0] = offsetof(XSAVE_FORMAT, XmmRegisters); + xstate->Features[1].Size = xstate->AllFeatures[1] = sizeof(M128A) * 16; + xstate->Features[1].Offset = xstate->Features[0].Size; + off = sizeof(XSAVE_FORMAT) + sizeof(XSAVE_AREA_HEADER); + supported_mask >>= 2; + for (i = 2; supported_mask; ++i, supported_mask >>= 1) + { + if (!(supported_mask & 1)) continue; + __cpuidex( regs, 0xd, i ); + xstate->Features[i].Offset = regs[1]; + xstate->Features[i].Size = xstate->AllFeatures[i] = regs[0]; + if (regs[2] & 2) + { + xstate->AlignedFeatures |= (ULONG64)1 << i; + off = (off + 63) & ~63; + } + off += xstate->Features[i].Size; + TRACE("xstate[%d] offset %lu, size %lu, aligned %d.\n", i, xstate->Features[i].Offset, xstate->Features[i].Size, !!(regs[2] & 2)); + } + xstate->Size = xstate->CompactionEnabled ? off : xstate->Features[i - 1].Offset + xstate->Features[i - 1].Size; + TRACE("xstate size %lu, compacted %d, optimized %d.\n", xstate->Size, xstate->CompactionEnabled, xstate->OptimizedSave); } static BOOL is_tsc_trusted_by_the_kernel(void)