From 3da9c349d4468df5976339bc2b66bcfbf60e7f1d Mon Sep 17 00:00:00 2001 From: Vyacheslav Egorov Date: Thu, 8 Mar 2018 17:50:29 +0000 Subject: [PATCH] [vm/simarm] Fix VRECPS/VRSQRTSQS instruction implementation. This instruction handles 0.0 and infinity operands specially because otherwise it produces NaN where it should produce appropriate infinity or zero. Fixes https://github.com/dart-lang/sdk/issues/24399 Fixes https://github.com/dart-lang/sdk/issues/26675 Change-Id: I0741d0daa8b92b4dcd780b1453c9ec449552b1fd Reviewed-on: https://dart-review.googlesource.com/45382 Reviewed-by: Zach Anderson --- .../vm/compiler/assembler/assembler_arm.cc | 112 ++++++++++++++ runtime/vm/compiler/assembler/assembler_arm.h | 10 ++ .../compiler/assembler/assembler_arm_test.cc | 137 ++++++------------ runtime/vm/simulator_arm.cc | 98 +------------ 4 files changed, 170 insertions(+), 187 deletions(-) diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc index 14b68c011c0..acca2467f43 100644 --- a/runtime/vm/compiler/assembler/assembler_arm.cc +++ b/runtime/vm/compiler/assembler/assembler_arm.cc @@ -3426,6 +3426,118 @@ const char* Assembler::FpuRegisterName(FpuRegister reg) { return fpu_reg_names[reg]; } +float ReciprocalEstimate(float a) { + // From the ARM Architecture Reference Manual A2-85. + if (isinf(a) || (fabs(a) >= exp2f(126))) + return a >= 0.0f ? 0.0f : -0.0f; + else if (a == 0.0f) + return 1.0f / a; + else if (isnan(a)) + return a; + + uint32_t a_bits = bit_cast(a); + // scaled = '0011 1111 1110' : a<22:0> : Zeros(29) + uint64_t scaled = (static_cast(0x3fe) << 52) | + ((static_cast(a_bits) & 0x7fffff) << 29); + // result_exp = 253 - UInt(a<30:23>) + int32_t result_exp = 253 - ((a_bits >> 23) & 0xff); + ASSERT((result_exp >= 1) && (result_exp <= 252)); + + double scaled_d = bit_cast(scaled); + ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0)); + + // a in units of 1/512 rounded down. + int32_t q = static_cast(scaled_d * 512.0); + // reciprocal r. + double r = 1.0 / ((static_cast(q) + 0.5) / 512.0); + // r in units of 1/256 rounded to nearest. + int32_t s = static_cast(256.0 * r + 0.5); + double estimate = static_cast(s) / 256.0; + ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0))); + + // result = sign : result_exp<7:0> : estimate<51:29> + int32_t result_bits = + (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) | + ((bit_cast(estimate) >> 29) & 0x7fffff); + return bit_cast(result_bits); +} + +float ReciprocalStep(float op1, float op2) { + float p; + if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) { + p = 0.0f; + } else { + p = op1 * op2; + } + return 2.0f - p; +} + +float ReciprocalSqrtEstimate(float a) { + // From the ARM Architecture Reference Manual A2-87. + if (a < 0.0f) + return NAN; + else if (isinf(a) || (fabs(a) >= exp2f(126))) + return 0.0f; + else if (a == 0.0) + return 1.0f / a; + else if (isnan(a)) + return a; + + uint32_t a_bits = bit_cast(a); + uint64_t scaled; + if (((a_bits >> 23) & 1) != 0) { + // scaled = '0 01111111101' : operand<22:0> : Zeros(29) + scaled = (static_cast(0x3fd) << 52) | + ((static_cast(a_bits) & 0x7fffff) << 29); + } else { + // scaled = '0 01111111110' : operand<22:0> : Zeros(29) + scaled = (static_cast(0x3fe) << 52) | + ((static_cast(a_bits) & 0x7fffff) << 29); + } + // result_exp = (380 - UInt(operand<30:23>) DIV 2; + int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2; + + double scaled_d = bit_cast(scaled); + ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0)); + + double r; + if (scaled_d < 0.5) { + // range 0.25 <= a < 0.5 + + // a in units of 1/512 rounded down. + int32_t q0 = static_cast(scaled_d * 512.0); + // reciprocal root r. + r = 1.0 / sqrt((static_cast(q0) + 0.5) / 512.0); + } else { + // range 0.5 <= a < 1.0 + + // a in units of 1/256 rounded down. + int32_t q1 = static_cast(scaled_d * 256.0); + // reciprocal root r. + r = 1.0 / sqrt((static_cast(q1) + 0.5) / 256.0); + } + // r in units of 1/256 rounded to nearest. + int32_t s = static_cast(256.0 * r + 0.5); + double estimate = static_cast(s) / 256.0; + ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0))); + + // result = 0 : result_exp<7:0> : estimate<51:29> + int32_t result_bits = + ((result_exp & 0xff) << 23) | + ((bit_cast(estimate) >> 29) & 0x7fffff); + return bit_cast(result_bits); +} + +float ReciprocalSqrtStep(float op1, float op2) { + float p; + if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) { + p = 0.0f; + } else { + p = op1 * op2; + } + return (3.0f - p) / 2.0f; +} + } // namespace dart #endif // defined(TARGET_ARCH_ARM) && !defined(DART_PRECOMPILED_RUNTIME) diff --git a/runtime/vm/compiler/assembler/assembler_arm.h b/runtime/vm/compiler/assembler/assembler_arm.h index b13d839acf2..36936e0841e 100644 --- a/runtime/vm/compiler/assembler/assembler_arm.h +++ b/runtime/vm/compiler/assembler/assembler_arm.h @@ -1238,6 +1238,16 @@ class Assembler : public ValueObject { DISALLOW_COPY_AND_ASSIGN(Assembler); }; +// Floating-point reciprocal estimate and step (see pages A2-85 and A2-86 of +// ARM Architecture Reference Manual ARMv7-A edition). +float ReciprocalEstimate(float op); +float ReciprocalStep(float op1, float op2); + +// Floating-point reciprocal square root estimate and step (see pages A2-87 to +// A2-90 of ARM Architecture Reference Manual ARMv7-A edition). +float ReciprocalSqrtEstimate(float op); +float ReciprocalSqrtStep(float op1, float op2); + } // namespace dart #endif // RUNTIME_VM_COMPILER_ASSEMBLER_ASSEMBLER_ARM_H_ diff --git a/runtime/vm/compiler/assembler/assembler_arm_test.cc b/runtime/vm/compiler/assembler/assembler_arm_test.cc index 2809d585086..2b1c20d02ea 100644 --- a/runtime/vm/compiler/assembler/assembler_arm_test.cc +++ b/runtime/vm/compiler/assembler/assembler_arm_test.cc @@ -13,6 +13,48 @@ namespace dart { +TEST_CASE(ReciprocalOps) { + EXPECT_EQ(true, isinf(ReciprocalEstimate(-0.0f))); + EXPECT_EQ(true, signbit(ReciprocalEstimate(-0.0f))); + EXPECT_EQ(true, isinf(ReciprocalEstimate(0.0f))); + EXPECT_EQ(true, !signbit(ReciprocalEstimate(0.0f))); + +#define AS_UINT32(v) (bit_cast(v)) +#define EXPECT_BITWISE_EQ(a, b) EXPECT_EQ(AS_UINT32(a), AS_UINT32(b)) + + EXPECT_BITWISE_EQ(0.0f, ReciprocalEstimate(kPosInfinity)); + EXPECT_BITWISE_EQ(-0.0f, ReciprocalEstimate(kNegInfinity)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kPosInfinity)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kNegInfinity)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kPosInfinity)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kNegInfinity)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, 0.0f)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, 0.0f)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, -0.0f)); + EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, -0.0f)); + + EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f))); + EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(kNegInfinity))); + EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f))); + EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(-0.0f))); + EXPECT_EQ(true, signbit(ReciprocalSqrtEstimate(-0.0f))); + EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(0.0f))); + EXPECT_EQ(true, !signbit(ReciprocalSqrtEstimate(0.0f))); + EXPECT_BITWISE_EQ(0.0f, ReciprocalSqrtEstimate(kPosInfinity)); + + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kPosInfinity)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kNegInfinity)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kPosInfinity)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kNegInfinity)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, 0.0f)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, 0.0f)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, -0.0f)); + EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, -0.0f)); + +#undef AS_UINT32 +#undef EXPECT_BITWISE_EQ +} + #define __ assembler-> ASSEMBLER_TEST_GENERATE(Simple, assembler) { @@ -3416,43 +3458,6 @@ ASSEMBLER_TEST_RUN(Vmaxqs, test) { } } -// This is the same function as in the Simulator. -static float arm_recip_estimate(float a) { - // From the ARM Architecture Reference Manual A2-85. - if (isinf(a) || (fabs(a) >= exp2f(126))) - return 0.0; - else if (a == 0.0) - return kPosInfinity; - else if (isnan(a)) - return a; - - uint32_t a_bits = bit_cast(a); - // scaled = '0011 1111 1110' : a<22:0> : Zeros(29) - uint64_t scaled = (static_cast(0x3fe) << 52) | - ((static_cast(a_bits) & 0x7fffff) << 29); - // result_exp = 253 - UInt(a<30:23>) - int32_t result_exp = 253 - ((a_bits >> 23) & 0xff); - ASSERT((result_exp >= 1) && (result_exp <= 252)); - - double scaled_d = bit_cast(scaled); - ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0)); - - // a in units of 1/512 rounded down. - int32_t q = static_cast(scaled_d * 512.0); - // reciprocal r. - double r = 1.0 / ((static_cast(q) + 0.5) / 512.0); - // r in units of 1/256 rounded to nearest. - int32_t s = static_cast(256.0 * r + 0.5); - double estimate = static_cast(s) / 256.0; - ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0))); - - // result = sign : result_exp<7:0> : estimate<51:29> - int32_t result_bits = - (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) | - ((bit_cast(estimate) >> 29) & 0x7fffff); - return bit_cast(result_bits); -} - ASSEMBLER_TEST_GENERATE(Vrecpeqs, assembler) { if (TargetCPUFeatures::neon_supported()) { __ LoadSImmediate(S4, 147.0); @@ -3469,7 +3474,7 @@ ASSEMBLER_TEST_RUN(Vrecpeqs, test) { if (TargetCPUFeatures::neon_supported()) { typedef float (*Vrecpeqs)() DART_UNUSED; float res = EXECUTE_TEST_CODE_FLOAT(Vrecpeqs, test->entry()); - EXPECT_FLOAT_EQ(arm_recip_estimate(147.0), res, 0.0001f); + EXPECT_FLOAT_EQ(ReciprocalEstimate(147.0), res, 0.0001f); } } @@ -3526,60 +3531,6 @@ ASSEMBLER_TEST_RUN(Reciprocal, test) { } } -static float arm_reciprocal_sqrt_estimate(float a) { - // From the ARM Architecture Reference Manual A2-87. - if (isinf(a) || (fabs(a) >= exp2f(126))) - return 0.0; - else if (a == 0.0) - return kPosInfinity; - else if (isnan(a)) - return a; - - uint32_t a_bits = bit_cast(a); - uint64_t scaled; - if (((a_bits >> 23) & 1) != 0) { - // scaled = '0 01111111101' : operand<22:0> : Zeros(29) - scaled = (static_cast(0x3fd) << 52) | - ((static_cast(a_bits) & 0x7fffff) << 29); - } else { - // scaled = '0 01111111110' : operand<22:0> : Zeros(29) - scaled = (static_cast(0x3fe) << 52) | - ((static_cast(a_bits) & 0x7fffff) << 29); - } - // result_exp = (380 - UInt(operand<30:23>) DIV 2; - int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2; - - double scaled_d = bit_cast(scaled); - ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0)); - - double r; - if (scaled_d < 0.5) { - // range 0.25 <= a < 0.5 - - // a in units of 1/512 rounded down. - int32_t q0 = static_cast(scaled_d * 512.0); - // reciprocal root r. - r = 1.0 / sqrt((static_cast(q0) + 0.5) / 512.0); - } else { - // range 0.5 <= a < 1.0 - - // a in units of 1/256 rounded down. - int32_t q1 = static_cast(scaled_d * 256.0); - // reciprocal root r. - r = 1.0 / sqrt((static_cast(q1) + 0.5) / 256.0); - } - // r in units of 1/256 rounded to nearest. - int32_t s = static_cast(256.0 * r + 0.5); - double estimate = static_cast(s) / 256.0; - ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0))); - - // result = 0 : result_exp<7:0> : estimate<51:29> - int32_t result_bits = - ((result_exp & 0xff) << 23) | - ((bit_cast(estimate) >> 29) & 0x7fffff); - return bit_cast(result_bits); -} - ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) { if (TargetCPUFeatures::neon_supported()) { __ LoadSImmediate(S4, 147.0); @@ -3597,7 +3548,7 @@ ASSEMBLER_TEST_RUN(Vrsqrteqs, test) { if (TargetCPUFeatures::neon_supported()) { typedef float (*Vrsqrteqs)() DART_UNUSED; float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry()); - EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f); + EXPECT_FLOAT_EQ(ReciprocalSqrtEstimate(147.0), res, 0.0001f); } } diff --git a/runtime/vm/simulator_arm.cc b/runtime/vm/simulator_arm.cc index ca885eaca61..fa91d0ac1da 100644 --- a/runtime/vm/simulator_arm.cc +++ b/runtime/vm/simulator_arm.cc @@ -2859,96 +2859,6 @@ void Simulator::DecodeType7(Instr* instr) { } } -static float arm_reciprocal_sqrt_estimate(float a) { - // From the ARM Architecture Reference Manual A2-87. - if (isinf(a) || (fabs(a) >= exp2f(126))) - return 0.0; - else if (a == 0.0) - return kPosInfinity; - else if (isnan(a)) - return a; - - uint32_t a_bits = bit_cast(a); - uint64_t scaled; - if (((a_bits >> 23) & 1) != 0) { - // scaled = '0 01111111101' : operand<22:0> : Zeros(29) - scaled = (static_cast(0x3fd) << 52) | - ((static_cast(a_bits) & 0x7fffff) << 29); - } else { - // scaled = '0 01111111110' : operand<22:0> : Zeros(29) - scaled = (static_cast(0x3fe) << 52) | - ((static_cast(a_bits) & 0x7fffff) << 29); - } - // result_exp = (380 - UInt(operand<30:23>) DIV 2; - int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2; - - double scaled_d = bit_cast(scaled); - ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0)); - - double r; - if (scaled_d < 0.5) { - // range 0.25 <= a < 0.5 - - // a in units of 1/512 rounded down. - int32_t q0 = static_cast(scaled_d * 512.0); - // reciprocal root r. - r = 1.0 / sqrt((static_cast(q0) + 0.5) / 512.0); - } else { - // range 0.5 <= a < 1.0 - - // a in units of 1/256 rounded down. - int32_t q1 = static_cast(scaled_d * 256.0); - // reciprocal root r. - r = 1.0 / sqrt((static_cast(q1) + 0.5) / 256.0); - } - // r in units of 1/256 rounded to nearest. - int32_t s = static_cast(256.0 * r + 0.5); - double estimate = static_cast(s) / 256.0; - ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0))); - - // result = 0 : result_exp<7:0> : estimate<51:29> - int32_t result_bits = - ((result_exp & 0xff) << 23) | - ((bit_cast(estimate) >> 29) & 0x7fffff); - return bit_cast(result_bits); -} - -static float arm_recip_estimate(float a) { - // From the ARM Architecture Reference Manual A2-85. - if (isinf(a) || (fabs(a) >= exp2f(126))) - return 0.0; - else if (a == 0.0) - return kPosInfinity; - else if (isnan(a)) - return a; - - uint32_t a_bits = bit_cast(a); - // scaled = '0011 1111 1110' : a<22:0> : Zeros(29) - uint64_t scaled = (static_cast(0x3fe) << 52) | - ((static_cast(a_bits) & 0x7fffff) << 29); - // result_exp = 253 - UInt(a<30:23>) - int32_t result_exp = 253 - ((a_bits >> 23) & 0xff); - ASSERT((result_exp >= 1) && (result_exp <= 252)); - - double scaled_d = bit_cast(scaled); - ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0)); - - // a in units of 1/512 rounded down. - int32_t q = static_cast(scaled_d * 512.0); - // reciprocal r. - double r = 1.0 / ((static_cast(q) + 0.5) / 512.0); - // r in units of 1/256 rounded to nearest. - int32_t s = static_cast(256.0 * r + 0.5); - double estimate = static_cast(s) / 256.0; - ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0))); - - // result = sign : result_exp<7:0> : estimate<51:29> - int32_t result_bits = - (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) | - ((bit_cast(estimate) >> 29) & 0x7fffff); - return bit_cast(result_bits); -} - static void simd_value_swap(simd_value_t* s1, int i1, simd_value_t* s2, @@ -3213,26 +3123,26 @@ void Simulator::DecodeSIMDDataProcessing(Instr* instr) { (instr->Bits(16, 4) == 11)) { // Format(instr, "vrecpeq 'qd, 'qm"); for (int i = 0; i < 4; i++) { - s8d.data_[i].f = arm_recip_estimate(s8m.data_[i].f); + s8d.data_[i].f = ReciprocalEstimate(s8m.data_[i].f); } } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) && (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) { // Format(instr, "vrecpsq 'qd, 'qn, 'qm"); for (int i = 0; i < 4; i++) { - s8d.data_[i].f = 2.0 - (s8n.data_[i].f * s8m.data_[i].f); + s8d.data_[i].f = ReciprocalStep(s8n.data_[i].f, s8m.data_[i].f); } } else if ((instr->Bits(8, 4) == 5) && (instr->Bit(4) == 0) && (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) && (instr->Bit(7) == 1) && (instr->Bits(16, 4) == 11)) { // Format(instr, "vrsqrteqs 'qd, 'qm"); for (int i = 0; i < 4; i++) { - s8d.data_[i].f = arm_reciprocal_sqrt_estimate(s8m.data_[i].f); + s8d.data_[i].f = ReciprocalSqrtEstimate(s8m.data_[i].f); } } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) && (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 0)) { // Format(instr, "vrsqrtsqs 'qd, 'qn, 'qm"); for (int i = 0; i < 4; i++) { - s8d.data_[i].f = (3.0 - s8n.data_[i].f * s8m.data_[i].f) / 2.0; + s8d.data_[i].f = ReciprocalSqrtStep(s8n.data_[i].f, s8m.data_[i].f); } } else if ((instr->Bits(8, 4) == 12) && (instr->Bit(4) == 0) && (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&