AK: Add an helper for quick hardware based rounding

This uses the `fistp` and `cvts[sd]2si` respectively, to potentially round floating point values with just one instruction. This falls back to `llrint[fl]?` on aarch64 for now.
2024-10-15 12:23:15 +00:00 · 2022-04-14 17:33:01 +02:00 · 2022-04-14 17:33:01 +02:00 · 37ff2b9bd2
parent 71b175d4ed
commit 37ff2b9bd2
1 changed files with 86 additions and 2 deletions
--- a/AK/Math.h
+++ b/AK/Math.h
@ -265,7 +265,7 @@ constexpr T tan(T angle)
    CONSTEXPR_STATE(tan, angle);
 #if ARCH(I386) || ARCH(X86_64)
-    double ret, one;
+    T ret, one;
    asm(
        "fptan"
        : "=t"(one), "=u"(ret)
@ -536,6 +536,89 @@ using Hyperbolic::cosh;
 using Hyperbolic::sinh;
 using Hyperbolic::tanh;
 template<Integral I, FloatingPoint P>
 ALWAYS_INLINE I round_to(P value)
 {
 #if ARCH(I386) || ARCH(X86_64)
    // Note: fistps outputs into a signed integer location (i16, i32, i64),
    //       so lets be nice and tell the compiler that.
    Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret;
    if constexpr (sizeof(I) == sizeof(i64)) {
        asm("fistpll %0"
            : "=m"(ret)
            : "t"(value)
            : "st");
    } else if constexpr (sizeof(I) == sizeof(i32)) {
        asm("fistpl %0"
            : "=m"(ret)
            : "t"(value)
            : "st");
    } else {
        asm("fistps %0"
            : "=m"(ret)
            : "t"(value)
            : "st");
    }
    return static_cast<I>(ret);
 #else
    if constexpr (IsSame<P, long double>)
        return static_cast<I>(llrintl(value));
    if constexpr (IsSame<P, double>)
        return static_cast<I>(llrint(value));
    if constexpr (IsSame<P, float>)
        return static_cast<I>(llrintf(value));
 #endif
 }
 #ifdef __SSE__
 template<Integral I>
 ALWAYS_INLINE I round_to(float value)
 {
    if constexpr (sizeof(I) == sizeof(i64)) {
        // Note: Outputting into 64-bit registers or memory locations requires the
        //       REX prefix, so we have to fall back to long doubles on i686
 #    if ARCH(X86_64)
        i64 ret;
        asm("cvtss2si %1, %0"
            : "=r"(ret)
            : "xm"(value));
        return static_cast<I>(ret);
 #    else
        return round_to<I, long double>(value);
 #    endif
    }
    i32 ret;
    asm("cvtss2si %1, %0"
        : "=r"(ret)
        : "xm"(value));
    return static_cast<I>(ret);
 }
 #endif
 #ifdef __SSE2__
 template<Integral I>
 ALWAYS_INLINE I round_to(double value)
 {
    if constexpr (sizeof(I) == sizeof(i64)) {
        // Note: Outputting into 64-bit registers or memory locations requires the
        //       REX prefix, so we have to fall back to long doubles on i686
 #    if ARCH(X86_64)
        i64 ret;
        asm("cvtsd2si %1, %0"
            : "=r"(ret)
            : "xm"(value));
        return static_cast<I>(ret);
 #    else
        return round_to<I, long double>(value);
 #    endif
    }
    i32 ret;
    asm("cvtsd2si %1, %0"
        : "=r"(ret)
        : "xm"(value));
    return static_cast<I>(ret);
 }
 #endif
 template<FloatingPoint T>
 constexpr T pow(T x, T y)
 {
@ -563,5 +646,6 @@ constexpr T pow(T x, T y)
 }
 #undef CONSTEXPR_STATE
 }
 using AK::round_to;