mirror of
https://gitlab.freedesktop.org/pipewire/pipewire
synced 2024-09-20 00:11:31 +00:00
84ecebbd4e
Check if we can run SSE instructions before executing the denormals SSE code. Fixes #1775
204 lines
5.2 KiB
C
204 lines
5.2 KiB
C
/* Spa
|
|
*
|
|
* Copyright © 2018 Wim Taymans
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <cpuid.h>
|
|
|
|
static int
|
|
x86_init(struct impl *impl)
|
|
{
|
|
uint32_t flags;
|
|
|
|
unsigned int vendor;
|
|
unsigned int model, family;
|
|
unsigned int max_level, ext_level, has_osxsave;
|
|
unsigned int eax, ebx, ecx, edx;
|
|
|
|
|
|
max_level = __get_cpuid_max(0, &vendor);
|
|
if (max_level < 1)
|
|
return 0;
|
|
|
|
__cpuid(1, eax, ebx, ecx, edx);
|
|
|
|
model = (eax >> 4) & 0x0f;
|
|
family = (eax >> 8) & 0x0f;
|
|
|
|
if (vendor == signature_INTEL_ebx ||
|
|
vendor == signature_AMD_ebx) {
|
|
unsigned int extended_model, extended_family;
|
|
|
|
extended_model = (eax >> 12) & 0xf0;
|
|
extended_family = (eax >> 20) & 0xff;
|
|
if (family == 0x0f) {
|
|
family += extended_family;
|
|
model += extended_model;
|
|
} else if (family == 0x06)
|
|
model += extended_model;
|
|
}
|
|
|
|
flags = 0;
|
|
if (ecx & bit_SSE3)
|
|
flags |= SPA_CPU_FLAG_SSE3;
|
|
if (ecx & bit_SSSE3)
|
|
flags |= SPA_CPU_FLAG_SSSE3;
|
|
if (ecx & bit_SSE4_1)
|
|
flags |= SPA_CPU_FLAG_SSE41;
|
|
if (ecx & bit_SSE4_2)
|
|
flags |= SPA_CPU_FLAG_SSE42;
|
|
if (ecx & bit_AVX)
|
|
flags |= SPA_CPU_FLAG_AVX;
|
|
has_osxsave = ecx & bit_OSXSAVE;
|
|
if (ecx & bit_FMA)
|
|
flags |= SPA_CPU_FLAG_FMA3;
|
|
|
|
if (edx & bit_CMOV)
|
|
flags |= SPA_CPU_FLAG_CMOV;
|
|
if (edx & bit_MMX)
|
|
flags |= SPA_CPU_FLAG_MMX;
|
|
if (edx & bit_MMXEXT)
|
|
flags |= SPA_CPU_FLAG_MMXEXT;
|
|
if (edx & bit_SSE)
|
|
flags |= SPA_CPU_FLAG_SSE;
|
|
if (edx & bit_SSE2)
|
|
flags |= SPA_CPU_FLAG_SSE2;
|
|
|
|
|
|
if (max_level >= 7) {
|
|
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
|
|
|
if (ebx & bit_BMI)
|
|
flags |= SPA_CPU_FLAG_BMI1;
|
|
if (ebx & bit_AVX2)
|
|
flags |= SPA_CPU_FLAG_AVX2;
|
|
if (ebx & bit_BMI2)
|
|
flags |= SPA_CPU_FLAG_BMI2;
|
|
#define AVX512_BITS (bit_AVX512F | bit_AVX512DQ | bit_AVX512CD | bit_AVX512BW | bit_AVX512VL)
|
|
if ((ebx & AVX512_BITS) == AVX512_BITS)
|
|
flags |= SPA_CPU_FLAG_AVX512;
|
|
}
|
|
|
|
/* Check cpuid level of extended features. */
|
|
__cpuid (0x80000000, ext_level, ebx, ecx, edx);
|
|
|
|
if (ext_level >= 0x80000001) {
|
|
__cpuid (0x80000001, eax, ebx, ecx, edx);
|
|
|
|
if (edx & bit_3DNOW)
|
|
flags |= SPA_CPU_FLAG_3DNOW;
|
|
if (edx & bit_3DNOWP)
|
|
flags |= SPA_CPU_FLAG_3DNOWEXT;
|
|
if (edx & bit_MMX)
|
|
flags |= SPA_CPU_FLAG_MMX;
|
|
if (edx & bit_MMXEXT)
|
|
flags |= SPA_CPU_FLAG_MMXEXT;
|
|
if (ecx & bit_FMA4)
|
|
flags |= SPA_CPU_FLAG_FMA4;
|
|
if (ecx & bit_XOP)
|
|
flags |= SPA_CPU_FLAG_XOP;
|
|
}
|
|
|
|
/* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
|
|
#define XCR_XFEATURE_ENABLED_MASK 0x0
|
|
#define XSTATE_FP 0x1
|
|
#define XSTATE_SSE 0x2
|
|
#define XSTATE_YMM 0x4
|
|
#define XSTATE_OPMASK 0x20
|
|
#define XSTATE_ZMM 0x40
|
|
#define XSTATE_HI_ZMM 0x80
|
|
|
|
#define XCR_AVX_ENABLED_MASK \
|
|
(XSTATE_SSE | XSTATE_YMM)
|
|
#define XCR_AVX512F_ENABLED_MASK \
|
|
(XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
|
|
|
|
if (has_osxsave)
|
|
asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
|
|
: "=a" (eax), "=d" (edx)
|
|
: "c" (XCR_XFEATURE_ENABLED_MASK));
|
|
else
|
|
eax = 0;
|
|
|
|
/* Check if AVX registers are supported. */
|
|
if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK) {
|
|
flags &= ~(SPA_CPU_FLAG_AVX |
|
|
SPA_CPU_FLAG_AVX2 |
|
|
SPA_CPU_FLAG_FMA3 |
|
|
SPA_CPU_FLAG_FMA4 |
|
|
SPA_CPU_FLAG_XOP);
|
|
}
|
|
|
|
/* Check if AVX512F registers are supported. */
|
|
if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK) {
|
|
flags &= ~SPA_CPU_FLAG_AVX512;
|
|
}
|
|
|
|
if (flags & SPA_CPU_FLAG_AVX512)
|
|
impl->max_align = 64;
|
|
else if (flags & (SPA_CPU_FLAG_AVX2 |
|
|
SPA_CPU_FLAG_AVX |
|
|
SPA_CPU_FLAG_XOP |
|
|
SPA_CPU_FLAG_FMA4 |
|
|
SPA_CPU_FLAG_FMA3))
|
|
impl->max_align = 32;
|
|
else if (flags & (SPA_CPU_FLAG_AESNI |
|
|
SPA_CPU_FLAG_SSE42 |
|
|
SPA_CPU_FLAG_SSE41 |
|
|
SPA_CPU_FLAG_SSSE3 |
|
|
SPA_CPU_FLAG_SSE3 |
|
|
SPA_CPU_FLAG_SSE2 |
|
|
SPA_CPU_FLAG_SSE))
|
|
impl->max_align = 16;
|
|
else
|
|
impl->max_align = 8;
|
|
|
|
impl->flags = flags;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if defined(HAVE_SSE)
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
static int x86_zero_denormals(void *object, bool enable)
|
|
{
|
|
#if defined(HAVE_SSE)
|
|
struct impl *impl = object;
|
|
if (impl->flags & SPA_CPU_FLAG_SSE) {
|
|
unsigned int mxcsr;
|
|
mxcsr = _mm_getcsr();
|
|
if (enable)
|
|
mxcsr |= 0x8040;
|
|
else
|
|
mxcsr &= ~0x8040;
|
|
_mm_setcsr(mxcsr);
|
|
spa_log_debug(impl->log, "%p: zero-denormals:%s",
|
|
impl, enable ? "on" : "off");
|
|
}
|
|
return 0;
|
|
#else
|
|
return -ENOTSUP;
|
|
#endif
|
|
}
|