[vm/simarm] Fix VRECPS/VRSQRTSQS instruction implementation.

This instruction handles 0.0 and infinity operands
specially because otherwise it produces NaN where it
should produce appropriate infinity or zero.

Fixes https://github.com/dart-lang/sdk/issues/24399
Fixes https://github.com/dart-lang/sdk/issues/26675

Change-Id: I0741d0daa8b92b4dcd780b1453c9ec449552b1fd
Reviewed-on: https://dart-review.googlesource.com/45382
Reviewed-by: Zach Anderson <zra@google.com>
This commit is contained in:
Vyacheslav Egorov 2018-03-08 17:50:29 +00:00
parent d059e97974
commit 3da9c349d4
4 changed files with 170 additions and 187 deletions

View file

@ -3426,6 +3426,118 @@ const char* Assembler::FpuRegisterName(FpuRegister reg) {
return fpu_reg_names[reg];
}
float ReciprocalEstimate(float a) {
// From the ARM Architecture Reference Manual A2-85.
if (isinf(a) || (fabs(a) >= exp2f(126)))
return a >= 0.0f ? 0.0f : -0.0f;
else if (a == 0.0f)
return 1.0f / a;
else if (isnan(a))
return a;
uint32_t a_bits = bit_cast<uint32_t, float>(a);
// scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
// result_exp = 253 - UInt(a<30:23>)
int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
ASSERT((result_exp >= 1) && (result_exp <= 252));
double scaled_d = bit_cast<double, uint64_t>(scaled);
ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
// a in units of 1/512 rounded down.
int32_t q = static_cast<int32_t>(scaled_d * 512.0);
// reciprocal r.
double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
// r in units of 1/256 rounded to nearest.
int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
double estimate = static_cast<double>(s) / 256.0;
ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
// result = sign : result_exp<7:0> : estimate<51:29>
int32_t result_bits =
(a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
return bit_cast<float, int32_t>(result_bits);
}
float ReciprocalStep(float op1, float op2) {
float p;
if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) {
p = 0.0f;
} else {
p = op1 * op2;
}
return 2.0f - p;
}
float ReciprocalSqrtEstimate(float a) {
// From the ARM Architecture Reference Manual A2-87.
if (a < 0.0f)
return NAN;
else if (isinf(a) || (fabs(a) >= exp2f(126)))
return 0.0f;
else if (a == 0.0)
return 1.0f / a;
else if (isnan(a))
return a;
uint32_t a_bits = bit_cast<uint32_t, float>(a);
uint64_t scaled;
if (((a_bits >> 23) & 1) != 0) {
// scaled = '0 01111111101' : operand<22:0> : Zeros(29)
scaled = (static_cast<uint64_t>(0x3fd) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
} else {
// scaled = '0 01111111110' : operand<22:0> : Zeros(29)
scaled = (static_cast<uint64_t>(0x3fe) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
}
// result_exp = (380 - UInt(operand<30:23>) DIV 2;
int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
double scaled_d = bit_cast<double, uint64_t>(scaled);
ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
double r;
if (scaled_d < 0.5) {
// range 0.25 <= a < 0.5
// a in units of 1/512 rounded down.
int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
// reciprocal root r.
r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
} else {
// range 0.5 <= a < 1.0
// a in units of 1/256 rounded down.
int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
// reciprocal root r.
r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
}
// r in units of 1/256 rounded to nearest.
int32_t s = static_cast<int>(256.0 * r + 0.5);
double estimate = static_cast<double>(s) / 256.0;
ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
// result = 0 : result_exp<7:0> : estimate<51:29>
int32_t result_bits =
((result_exp & 0xff) << 23) |
((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
return bit_cast<float, int32_t>(result_bits);
}
float ReciprocalSqrtStep(float op1, float op2) {
float p;
if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) {
p = 0.0f;
} else {
p = op1 * op2;
}
return (3.0f - p) / 2.0f;
}
} // namespace dart
#endif // defined(TARGET_ARCH_ARM) && !defined(DART_PRECOMPILED_RUNTIME)

View file

@ -1238,6 +1238,16 @@ class Assembler : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(Assembler);
};
// Floating-point reciprocal estimate and step (see pages A2-85 and A2-86 of
// ARM Architecture Reference Manual ARMv7-A edition).
float ReciprocalEstimate(float op);
float ReciprocalStep(float op1, float op2);
// Floating-point reciprocal square root estimate and step (see pages A2-87 to
// A2-90 of ARM Architecture Reference Manual ARMv7-A edition).
float ReciprocalSqrtEstimate(float op);
float ReciprocalSqrtStep(float op1, float op2);
} // namespace dart
#endif // RUNTIME_VM_COMPILER_ASSEMBLER_ASSEMBLER_ARM_H_

View file

@ -13,6 +13,48 @@
namespace dart {
TEST_CASE(ReciprocalOps) {
EXPECT_EQ(true, isinf(ReciprocalEstimate(-0.0f)));
EXPECT_EQ(true, signbit(ReciprocalEstimate(-0.0f)));
EXPECT_EQ(true, isinf(ReciprocalEstimate(0.0f)));
EXPECT_EQ(true, !signbit(ReciprocalEstimate(0.0f)));
#define AS_UINT32(v) (bit_cast<uint32_t, float>(v))
#define EXPECT_BITWISE_EQ(a, b) EXPECT_EQ(AS_UINT32(a), AS_UINT32(b))
EXPECT_BITWISE_EQ(0.0f, ReciprocalEstimate(kPosInfinity));
EXPECT_BITWISE_EQ(-0.0f, ReciprocalEstimate(kNegInfinity));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kPosInfinity));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kNegInfinity));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kPosInfinity));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kNegInfinity));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, 0.0f));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, 0.0f));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, -0.0f));
EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, -0.0f));
EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f)));
EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(kNegInfinity)));
EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f)));
EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(-0.0f)));
EXPECT_EQ(true, signbit(ReciprocalSqrtEstimate(-0.0f)));
EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(0.0f)));
EXPECT_EQ(true, !signbit(ReciprocalSqrtEstimate(0.0f)));
EXPECT_BITWISE_EQ(0.0f, ReciprocalSqrtEstimate(kPosInfinity));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kPosInfinity));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kNegInfinity));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kPosInfinity));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kNegInfinity));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, 0.0f));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, 0.0f));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, -0.0f));
EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, -0.0f));
#undef AS_UINT32
#undef EXPECT_BITWISE_EQ
}
#define __ assembler->
ASSEMBLER_TEST_GENERATE(Simple, assembler) {
@ -3416,43 +3458,6 @@ ASSEMBLER_TEST_RUN(Vmaxqs, test) {
}
}
// This is the same function as in the Simulator.
static float arm_recip_estimate(float a) {
// From the ARM Architecture Reference Manual A2-85.
if (isinf(a) || (fabs(a) >= exp2f(126)))
return 0.0;
else if (a == 0.0)
return kPosInfinity;
else if (isnan(a))
return a;
uint32_t a_bits = bit_cast<uint32_t, float>(a);
// scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
// result_exp = 253 - UInt(a<30:23>)
int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
ASSERT((result_exp >= 1) && (result_exp <= 252));
double scaled_d = bit_cast<double, uint64_t>(scaled);
ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
// a in units of 1/512 rounded down.
int32_t q = static_cast<int32_t>(scaled_d * 512.0);
// reciprocal r.
double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
// r in units of 1/256 rounded to nearest.
int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
double estimate = static_cast<double>(s) / 256.0;
ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
// result = sign : result_exp<7:0> : estimate<51:29>
int32_t result_bits =
(a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
return bit_cast<float, int32_t>(result_bits);
}
ASSEMBLER_TEST_GENERATE(Vrecpeqs, assembler) {
if (TargetCPUFeatures::neon_supported()) {
__ LoadSImmediate(S4, 147.0);
@ -3469,7 +3474,7 @@ ASSEMBLER_TEST_RUN(Vrecpeqs, test) {
if (TargetCPUFeatures::neon_supported()) {
typedef float (*Vrecpeqs)() DART_UNUSED;
float res = EXECUTE_TEST_CODE_FLOAT(Vrecpeqs, test->entry());
EXPECT_FLOAT_EQ(arm_recip_estimate(147.0), res, 0.0001f);
EXPECT_FLOAT_EQ(ReciprocalEstimate(147.0), res, 0.0001f);
}
}
@ -3526,60 +3531,6 @@ ASSEMBLER_TEST_RUN(Reciprocal, test) {
}
}
static float arm_reciprocal_sqrt_estimate(float a) {
// From the ARM Architecture Reference Manual A2-87.
if (isinf(a) || (fabs(a) >= exp2f(126)))
return 0.0;
else if (a == 0.0)
return kPosInfinity;
else if (isnan(a))
return a;
uint32_t a_bits = bit_cast<uint32_t, float>(a);
uint64_t scaled;
if (((a_bits >> 23) & 1) != 0) {
// scaled = '0 01111111101' : operand<22:0> : Zeros(29)
scaled = (static_cast<uint64_t>(0x3fd) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
} else {
// scaled = '0 01111111110' : operand<22:0> : Zeros(29)
scaled = (static_cast<uint64_t>(0x3fe) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
}
// result_exp = (380 - UInt(operand<30:23>) DIV 2;
int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
double scaled_d = bit_cast<double, uint64_t>(scaled);
ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
double r;
if (scaled_d < 0.5) {
// range 0.25 <= a < 0.5
// a in units of 1/512 rounded down.
int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
// reciprocal root r.
r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
} else {
// range 0.5 <= a < 1.0
// a in units of 1/256 rounded down.
int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
// reciprocal root r.
r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
}
// r in units of 1/256 rounded to nearest.
int32_t s = static_cast<int>(256.0 * r + 0.5);
double estimate = static_cast<double>(s) / 256.0;
ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
// result = 0 : result_exp<7:0> : estimate<51:29>
int32_t result_bits =
((result_exp & 0xff) << 23) |
((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
return bit_cast<float, int32_t>(result_bits);
}
ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) {
if (TargetCPUFeatures::neon_supported()) {
__ LoadSImmediate(S4, 147.0);
@ -3597,7 +3548,7 @@ ASSEMBLER_TEST_RUN(Vrsqrteqs, test) {
if (TargetCPUFeatures::neon_supported()) {
typedef float (*Vrsqrteqs)() DART_UNUSED;
float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry());
EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f);
EXPECT_FLOAT_EQ(ReciprocalSqrtEstimate(147.0), res, 0.0001f);
}
}

View file

@ -2859,96 +2859,6 @@ void Simulator::DecodeType7(Instr* instr) {
}
}
static float arm_reciprocal_sqrt_estimate(float a) {
// From the ARM Architecture Reference Manual A2-87.
if (isinf(a) || (fabs(a) >= exp2f(126)))
return 0.0;
else if (a == 0.0)
return kPosInfinity;
else if (isnan(a))
return a;
uint32_t a_bits = bit_cast<uint32_t, float>(a);
uint64_t scaled;
if (((a_bits >> 23) & 1) != 0) {
// scaled = '0 01111111101' : operand<22:0> : Zeros(29)
scaled = (static_cast<uint64_t>(0x3fd) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
} else {
// scaled = '0 01111111110' : operand<22:0> : Zeros(29)
scaled = (static_cast<uint64_t>(0x3fe) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
}
// result_exp = (380 - UInt(operand<30:23>) DIV 2;
int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
double scaled_d = bit_cast<double, uint64_t>(scaled);
ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
double r;
if (scaled_d < 0.5) {
// range 0.25 <= a < 0.5
// a in units of 1/512 rounded down.
int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
// reciprocal root r.
r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
} else {
// range 0.5 <= a < 1.0
// a in units of 1/256 rounded down.
int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
// reciprocal root r.
r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
}
// r in units of 1/256 rounded to nearest.
int32_t s = static_cast<int>(256.0 * r + 0.5);
double estimate = static_cast<double>(s) / 256.0;
ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
// result = 0 : result_exp<7:0> : estimate<51:29>
int32_t result_bits =
((result_exp & 0xff) << 23) |
((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
return bit_cast<float, int32_t>(result_bits);
}
static float arm_recip_estimate(float a) {
// From the ARM Architecture Reference Manual A2-85.
if (isinf(a) || (fabs(a) >= exp2f(126)))
return 0.0;
else if (a == 0.0)
return kPosInfinity;
else if (isnan(a))
return a;
uint32_t a_bits = bit_cast<uint32_t, float>(a);
// scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
// result_exp = 253 - UInt(a<30:23>)
int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
ASSERT((result_exp >= 1) && (result_exp <= 252));
double scaled_d = bit_cast<double, uint64_t>(scaled);
ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
// a in units of 1/512 rounded down.
int32_t q = static_cast<int32_t>(scaled_d * 512.0);
// reciprocal r.
double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
// r in units of 1/256 rounded to nearest.
int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
double estimate = static_cast<double>(s) / 256.0;
ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
// result = sign : result_exp<7:0> : estimate<51:29>
int32_t result_bits =
(a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
return bit_cast<float, int32_t>(result_bits);
}
static void simd_value_swap(simd_value_t* s1,
int i1,
simd_value_t* s2,
@ -3213,26 +3123,26 @@ void Simulator::DecodeSIMDDataProcessing(Instr* instr) {
(instr->Bits(16, 4) == 11)) {
// Format(instr, "vrecpeq 'qd, 'qm");
for (int i = 0; i < 4; i++) {
s8d.data_[i].f = arm_recip_estimate(s8m.data_[i].f);
s8d.data_[i].f = ReciprocalEstimate(s8m.data_[i].f);
}
} else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&
(instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) {
// Format(instr, "vrecpsq 'qd, 'qn, 'qm");
for (int i = 0; i < 4; i++) {
s8d.data_[i].f = 2.0 - (s8n.data_[i].f * s8m.data_[i].f);
s8d.data_[i].f = ReciprocalStep(s8n.data_[i].f, s8m.data_[i].f);
}
} else if ((instr->Bits(8, 4) == 5) && (instr->Bit(4) == 0) &&
(instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&
(instr->Bit(7) == 1) && (instr->Bits(16, 4) == 11)) {
// Format(instr, "vrsqrteqs 'qd, 'qm");
for (int i = 0; i < 4; i++) {
s8d.data_[i].f = arm_reciprocal_sqrt_estimate(s8m.data_[i].f);
s8d.data_[i].f = ReciprocalSqrtEstimate(s8m.data_[i].f);
}
} else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&
(instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 0)) {
// Format(instr, "vrsqrtsqs 'qd, 'qn, 'qm");
for (int i = 0; i < 4; i++) {
s8d.data_[i].f = (3.0 - s8n.data_[i].f * s8m.data_[i].f) / 2.0;
s8d.data_[i].f = ReciprocalSqrtStep(s8n.data_[i].f, s8m.data_[i].f);
}
} else if ((instr->Bits(8, 4) == 12) && (instr->Bit(4) == 0) &&
(instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&