From 3da9c349d4468df5976339bc2b66bcfbf60e7f1d Mon Sep 17 00:00:00 2001
From: Vyacheslav Egorov <vegorov@google.com>
Date: Thu, 8 Mar 2018 17:50:29 +0000
Subject: [PATCH] [vm/simarm] Fix VRECPS/VRSQRTSQS instruction implementation.

This instruction handles 0.0 and infinity operands
specially because otherwise it produces NaN where it
should produce appropriate infinity or zero.

Fixes https://github.com/dart-lang/sdk/issues/24399
Fixes https://github.com/dart-lang/sdk/issues/26675

Change-Id: I0741d0daa8b92b4dcd780b1453c9ec449552b1fd
Reviewed-on: https://dart-review.googlesource.com/45382
Reviewed-by: Zach Anderson <zra@google.com>
---
 .../vm/compiler/assembler/assembler_arm.cc    | 112 ++++++++++++++
 runtime/vm/compiler/assembler/assembler_arm.h |  10 ++
 .../compiler/assembler/assembler_arm_test.cc  | 137 ++++++------------
 runtime/vm/simulator_arm.cc                   |  98 +------------
 4 files changed, 170 insertions(+), 187 deletions(-)
diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc
index 14b68c011c0..acca2467f43 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm.cc
@@ -3426,6 +3426,118 @@ const char* Assembler::FpuRegisterName(FpuRegister reg) {
   return fpu_reg_names[reg];
 }
 
+float ReciprocalEstimate(float a) {
+  // From the ARM Architecture Reference Manual A2-85.
+  if (isinf(a) || (fabs(a) >= exp2f(126)))
+    return a >= 0.0f ? 0.0f : -0.0f;
+  else if (a == 0.0f)
+    return 1.0f / a;
+  else if (isnan(a))
+    return a;
+
+  uint32_t a_bits = bit_cast<uint32_t, float>(a);
+  // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
+  uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
+                    ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
+  // result_exp = 253 - UInt(a<30:23>)
+  int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
+  ASSERT((result_exp >= 1) && (result_exp <= 252));
+
+  double scaled_d = bit_cast<double, uint64_t>(scaled);
+  ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
+
+  // a in units of 1/512 rounded down.
+  int32_t q = static_cast<int32_t>(scaled_d * 512.0);
+  // reciprocal r.
+  double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
+  // r in units of 1/256 rounded to nearest.
+  int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
+  double estimate = static_cast<double>(s) / 256.0;
+  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
+
+  // result = sign : result_exp<7:0> : estimate<51:29>
+  int32_t result_bits =
+      (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
+      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
+  return bit_cast<float, int32_t>(result_bits);
+}
+
+float ReciprocalStep(float op1, float op2) {
+  float p;
+  if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) {
+    p = 0.0f;
+  } else {
+    p = op1 * op2;
+  }
+  return 2.0f - p;
+}
+
+float ReciprocalSqrtEstimate(float a) {
+  // From the ARM Architecture Reference Manual A2-87.
+  if (a < 0.0f)
+    return NAN;
+  else if (isinf(a) || (fabs(a) >= exp2f(126)))
+    return 0.0f;
+  else if (a == 0.0)
+    return 1.0f / a;
+  else if (isnan(a))
+    return a;
+
+  uint32_t a_bits = bit_cast<uint32_t, float>(a);
+  uint64_t scaled;
+  if (((a_bits >> 23) & 1) != 0) {
+    // scaled = '0 01111111101' : operand<22:0> : Zeros(29)
+    scaled = (static_cast<uint64_t>(0x3fd) << 52) |
+             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
+  } else {
+    // scaled = '0 01111111110' : operand<22:0> : Zeros(29)
+    scaled = (static_cast<uint64_t>(0x3fe) << 52) |
+             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
+  }
+  // result_exp = (380 - UInt(operand<30:23>) DIV 2;
+  int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
+
+  double scaled_d = bit_cast<double, uint64_t>(scaled);
+  ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
+
+  double r;
+  if (scaled_d < 0.5) {
+    // range 0.25 <= a < 0.5
+
+    // a in units of 1/512 rounded down.
+    int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
+    // reciprocal root r.
+    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
+  } else {
+    // range 0.5 <= a < 1.0
+
+    // a in units of 1/256 rounded down.
+    int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
+    // reciprocal root r.
+    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
+  }
+  // r in units of 1/256 rounded to nearest.
+  int32_t s = static_cast<int>(256.0 * r + 0.5);
+  double estimate = static_cast<double>(s) / 256.0;
+  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
+
+  // result = 0 : result_exp<7:0> : estimate<51:29>
+  int32_t result_bits =
+      ((result_exp & 0xff) << 23) |
+      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
+  return bit_cast<float, int32_t>(result_bits);
+}
+
+float ReciprocalSqrtStep(float op1, float op2) {
+  float p;
+  if ((isinf(op1) && op2 == 0.0f) || (op1 == 0.0f && isinf(op2))) {
+    p = 0.0f;
+  } else {
+    p = op1 * op2;
+  }
+  return (3.0f - p) / 2.0f;
+}
+
 }  // namespace dart
 
 #endif  // defined(TARGET_ARCH_ARM) && !defined(DART_PRECOMPILED_RUNTIME)
diff --git a/runtime/vm/compiler/assembler/assembler_arm.h b/runtime/vm/compiler/assembler/assembler_arm.h
index b13d839acf2..36936e0841e 100644
--- a/runtime/vm/compiler/assembler/assembler_arm.h
+++ b/runtime/vm/compiler/assembler/assembler_arm.h
@@ -1238,6 +1238,16 @@ class Assembler : public ValueObject {
   DISALLOW_COPY_AND_ASSIGN(Assembler);
 };
 
+// Floating-point reciprocal estimate and step (see pages A2-85 and A2-86 of
+// ARM Architecture Reference Manual ARMv7-A edition).
+float ReciprocalEstimate(float op);
+float ReciprocalStep(float op1, float op2);
+
+// Floating-point reciprocal square root estimate and step (see pages A2-87 to
+// A2-90 of ARM Architecture Reference Manual ARMv7-A edition).
+float ReciprocalSqrtEstimate(float op);
+float ReciprocalSqrtStep(float op1, float op2);
+
 }  // namespace dart
 
 #endif  // RUNTIME_VM_COMPILER_ASSEMBLER_ASSEMBLER_ARM_H_
diff --git a/runtime/vm/compiler/assembler/assembler_arm_test.cc b/runtime/vm/compiler/assembler/assembler_arm_test.cc
index 2809d585086..2b1c20d02ea 100644
--- a/runtime/vm/compiler/assembler/assembler_arm_test.cc
+++ b/runtime/vm/compiler/assembler/assembler_arm_test.cc
@@ -13,6 +13,48 @@
 
 namespace dart {
 
+TEST_CASE(ReciprocalOps) {
+  EXPECT_EQ(true, isinf(ReciprocalEstimate(-0.0f)));
+  EXPECT_EQ(true, signbit(ReciprocalEstimate(-0.0f)));
+  EXPECT_EQ(true, isinf(ReciprocalEstimate(0.0f)));
+  EXPECT_EQ(true, !signbit(ReciprocalEstimate(0.0f)));
+
+#define AS_UINT32(v) (bit_cast<uint32_t, float>(v))
+#define EXPECT_BITWISE_EQ(a, b) EXPECT_EQ(AS_UINT32(a), AS_UINT32(b))
+
+  EXPECT_BITWISE_EQ(0.0f, ReciprocalEstimate(kPosInfinity));
+  EXPECT_BITWISE_EQ(-0.0f, ReciprocalEstimate(kNegInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(-0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kPosInfinity, -0.0f));
+  EXPECT_BITWISE_EQ(2.0f, ReciprocalStep(kNegInfinity, -0.0f));
+
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f)));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(kNegInfinity)));
+  EXPECT_EQ(true, isnan(ReciprocalSqrtEstimate(-1.0f)));
+  EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(-0.0f)));
+  EXPECT_EQ(true, signbit(ReciprocalSqrtEstimate(-0.0f)));
+  EXPECT_EQ(true, isinf(ReciprocalSqrtEstimate(0.0f)));
+  EXPECT_EQ(true, !signbit(ReciprocalSqrtEstimate(0.0f)));
+  EXPECT_BITWISE_EQ(0.0f, ReciprocalSqrtEstimate(kPosInfinity));
+
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kPosInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(-0.0f, kNegInfinity));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, 0.0f));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kPosInfinity, -0.0f));
+  EXPECT_BITWISE_EQ(1.5f, ReciprocalSqrtStep(kNegInfinity, -0.0f));
+
+#undef AS_UINT32
+#undef EXPECT_BITWISE_EQ
+}
+
 #define __ assembler->
 
 ASSEMBLER_TEST_GENERATE(Simple, assembler) {
@@ -3416,43 +3458,6 @@ ASSEMBLER_TEST_RUN(Vmaxqs, test) {
   }
 }
 
-// This is the same function as in the Simulator.
-static float arm_recip_estimate(float a) {
-  // From the ARM Architecture Reference Manual A2-85.
-  if (isinf(a) || (fabs(a) >= exp2f(126)))
-    return 0.0;
-  else if (a == 0.0)
-    return kPosInfinity;
-  else if (isnan(a))
-    return a;
-
-  uint32_t a_bits = bit_cast<uint32_t, float>(a);
-  // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
-  uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
-                    ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  // result_exp = 253 - UInt(a<30:23>)
-  int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
-  ASSERT((result_exp >= 1) && (result_exp <= 252));
-
-  double scaled_d = bit_cast<double, uint64_t>(scaled);
-  ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
-
-  // a in units of 1/512 rounded down.
-  int32_t q = static_cast<int32_t>(scaled_d * 512.0);
-  // reciprocal r.
-  double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
-  // r in units of 1/256 rounded to nearest.
-  int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
-  double estimate = static_cast<double>(s) / 256.0;
-  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
-
-  // result = sign : result_exp<7:0> : estimate<51:29>
-  int32_t result_bits =
-      (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
-      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
-  return bit_cast<float, int32_t>(result_bits);
-}
-
 ASSEMBLER_TEST_GENERATE(Vrecpeqs, assembler) {
   if (TargetCPUFeatures::neon_supported()) {
     __ LoadSImmediate(S4, 147.0);
@@ -3469,7 +3474,7 @@ ASSEMBLER_TEST_RUN(Vrecpeqs, test) {
   if (TargetCPUFeatures::neon_supported()) {
     typedef float (*Vrecpeqs)() DART_UNUSED;
     float res = EXECUTE_TEST_CODE_FLOAT(Vrecpeqs, test->entry());
-    EXPECT_FLOAT_EQ(arm_recip_estimate(147.0), res, 0.0001f);
+    EXPECT_FLOAT_EQ(ReciprocalEstimate(147.0), res, 0.0001f);
   }
 }
 
@@ -3526,60 +3531,6 @@ ASSEMBLER_TEST_RUN(Reciprocal, test) {
   }
 }
 
-static float arm_reciprocal_sqrt_estimate(float a) {
-  // From the ARM Architecture Reference Manual A2-87.
-  if (isinf(a) || (fabs(a) >= exp2f(126)))
-    return 0.0;
-  else if (a == 0.0)
-    return kPosInfinity;
-  else if (isnan(a))
-    return a;
-
-  uint32_t a_bits = bit_cast<uint32_t, float>(a);
-  uint64_t scaled;
-  if (((a_bits >> 23) & 1) != 0) {
-    // scaled = '0 01111111101' : operand<22:0> : Zeros(29)
-    scaled = (static_cast<uint64_t>(0x3fd) << 52) |
-             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  } else {
-    // scaled = '0 01111111110' : operand<22:0> : Zeros(29)
-    scaled = (static_cast<uint64_t>(0x3fe) << 52) |
-             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  }
-  // result_exp = (380 - UInt(operand<30:23>) DIV 2;
-  int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
-
-  double scaled_d = bit_cast<double, uint64_t>(scaled);
-  ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
-
-  double r;
-  if (scaled_d < 0.5) {
-    // range 0.25 <= a < 0.5
-
-    // a in units of 1/512 rounded down.
-    int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
-    // reciprocal root r.
-    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
-  } else {
-    // range 0.5 <= a < 1.0
-
-    // a in units of 1/256 rounded down.
-    int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
-    // reciprocal root r.
-    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
-  }
-  // r in units of 1/256 rounded to nearest.
-  int32_t s = static_cast<int>(256.0 * r + 0.5);
-  double estimate = static_cast<double>(s) / 256.0;
-  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
-
-  // result = 0 : result_exp<7:0> : estimate<51:29>
-  int32_t result_bits =
-      ((result_exp & 0xff) << 23) |
-      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
-  return bit_cast<float, int32_t>(result_bits);
-}
-
 ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) {
   if (TargetCPUFeatures::neon_supported()) {
     __ LoadSImmediate(S4, 147.0);
@@ -3597,7 +3548,7 @@ ASSEMBLER_TEST_RUN(Vrsqrteqs, test) {
   if (TargetCPUFeatures::neon_supported()) {
     typedef float (*Vrsqrteqs)() DART_UNUSED;
     float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry());
-    EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f);
+    EXPECT_FLOAT_EQ(ReciprocalSqrtEstimate(147.0), res, 0.0001f);
   }
 }
 
diff --git a/runtime/vm/simulator_arm.cc b/runtime/vm/simulator_arm.cc
index ca885eaca61..fa91d0ac1da 100644
--- a/runtime/vm/simulator_arm.cc
+++ b/runtime/vm/simulator_arm.cc
@@ -2859,96 +2859,6 @@ void Simulator::DecodeType7(Instr* instr) {
   }
 }
 
-static float arm_reciprocal_sqrt_estimate(float a) {
-  // From the ARM Architecture Reference Manual A2-87.
-  if (isinf(a) || (fabs(a) >= exp2f(126)))
-    return 0.0;
-  else if (a == 0.0)
-    return kPosInfinity;
-  else if (isnan(a))
-    return a;
-
-  uint32_t a_bits = bit_cast<uint32_t, float>(a);
-  uint64_t scaled;
-  if (((a_bits >> 23) & 1) != 0) {
-    // scaled = '0 01111111101' : operand<22:0> : Zeros(29)
-    scaled = (static_cast<uint64_t>(0x3fd) << 52) |
-             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  } else {
-    // scaled = '0 01111111110' : operand<22:0> : Zeros(29)
-    scaled = (static_cast<uint64_t>(0x3fe) << 52) |
-             ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  }
-  // result_exp = (380 - UInt(operand<30:23>) DIV 2;
-  int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
-
-  double scaled_d = bit_cast<double, uint64_t>(scaled);
-  ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
-
-  double r;
-  if (scaled_d < 0.5) {
-    // range 0.25 <= a < 0.5
-
-    // a in units of 1/512 rounded down.
-    int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
-    // reciprocal root r.
-    r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
-  } else {
-    // range 0.5 <= a < 1.0
-
-    // a in units of 1/256 rounded down.
-    int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
-    // reciprocal root r.
-    r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
-  }
-  // r in units of 1/256 rounded to nearest.
-  int32_t s = static_cast<int>(256.0 * r + 0.5);
-  double estimate = static_cast<double>(s) / 256.0;
-  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
-
-  // result = 0 : result_exp<7:0> : estimate<51:29>
-  int32_t result_bits =
-      ((result_exp & 0xff) << 23) |
-      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
-  return bit_cast<float, int32_t>(result_bits);
-}
-
-static float arm_recip_estimate(float a) {
-  // From the ARM Architecture Reference Manual A2-85.
-  if (isinf(a) || (fabs(a) >= exp2f(126)))
-    return 0.0;
-  else if (a == 0.0)
-    return kPosInfinity;
-  else if (isnan(a))
-    return a;
-
-  uint32_t a_bits = bit_cast<uint32_t, float>(a);
-  // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)
-  uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) |
-                    ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
-  // result_exp = 253 - UInt(a<30:23>)
-  int32_t result_exp = 253 - ((a_bits >> 23) & 0xff);
-  ASSERT((result_exp >= 1) && (result_exp <= 252));
-
-  double scaled_d = bit_cast<double, uint64_t>(scaled);
-  ASSERT((scaled_d >= 0.5) && (scaled_d < 1.0));
-
-  // a in units of 1/512 rounded down.
-  int32_t q = static_cast<int32_t>(scaled_d * 512.0);
-  // reciprocal r.
-  double r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
-  // r in units of 1/256 rounded to nearest.
-  int32_t s = static_cast<int32_t>(256.0 * r + 0.5);
-  double estimate = static_cast<double>(s) / 256.0;
-  ASSERT((estimate >= 1.0) && (estimate <= (511.0 / 256.0)));
-
-  // result = sign : result_exp<7:0> : estimate<51:29>
-  int32_t result_bits =
-      (a_bits & 0x80000000) | ((result_exp & 0xff) << 23) |
-      ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
-  return bit_cast<float, int32_t>(result_bits);
-}
-
 static void simd_value_swap(simd_value_t* s1,
                             int i1,
                             simd_value_t* s2,
@@ -3213,26 +3123,26 @@ void Simulator::DecodeSIMDDataProcessing(Instr* instr) {
                (instr->Bits(16, 4) == 11)) {
       // Format(instr, "vrecpeq 'qd, 'qm");
       for (int i = 0; i < 4; i++) {
-        s8d.data_[i].f = arm_recip_estimate(s8m.data_[i].f);
+        s8d.data_[i].f = ReciprocalEstimate(s8m.data_[i].f);
       }
     } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&
                (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) {
       // Format(instr, "vrecpsq 'qd, 'qn, 'qm");
       for (int i = 0; i < 4; i++) {
-        s8d.data_[i].f = 2.0 - (s8n.data_[i].f * s8m.data_[i].f);
+        s8d.data_[i].f = ReciprocalStep(s8n.data_[i].f, s8m.data_[i].f);
       }
     } else if ((instr->Bits(8, 4) == 5) && (instr->Bit(4) == 0) &&
                (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&
                (instr->Bit(7) == 1) && (instr->Bits(16, 4) == 11)) {
       // Format(instr, "vrsqrteqs 'qd, 'qm");
       for (int i = 0; i < 4; i++) {
-        s8d.data_[i].f = arm_reciprocal_sqrt_estimate(s8m.data_[i].f);
+        s8d.data_[i].f = ReciprocalSqrtEstimate(s8m.data_[i].f);
       }
     } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&
                (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 0)) {
       // Format(instr, "vrsqrtsqs 'qd, 'qn, 'qm");
       for (int i = 0; i < 4; i++) {
-        s8d.data_[i].f = (3.0 - s8n.data_[i].f * s8m.data_[i].f) / 2.0;
+        s8d.data_[i].f = ReciprocalSqrtStep(s8n.data_[i].f, s8m.data_[i].f);
       }
     } else if ((instr->Bits(8, 4) == 12) && (instr->Bit(4) == 0) &&
                (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&