Use a minimax polynomial approximation instead of a Pade rational

function approximation for the second step. The polynomial has degree 2 for cbrtf() and 4 for cbrt(). These degrees are minimal for the final accuracy to be essentially the same as before (slightly smaller). Adjust the rounding between steps 2 and 3 to match. Unfortunately, for cbrt(), this breaks the claimed accuracy slightly although incorrect rounding doesn't. Claim less accuracy since its not worth pessimizing the polynomial or relying on exhaustive testing to get insignificantly more accuracy. This saves about 30 cycles on Athlons (mainly by avoiding 2 divisions) so it gives an overall optimization in the 10-25% range (a larger percentage for float precision, especially in 32-bit mode, since other overheads are more dominant for double precision, surprisingly more in 32-bit mode).
svn path=/head/; revision=153520
2024-10-05 08:00:30 +00:00 · 2005-12-19 00:22:03 +00:00 · 2005-12-19 00:22:03 +00:00 · c5964538b7 · 2020-12-20 02:59:44 +00:00
parent f94b154580
commit c5964538b7
2 changed files with 30 additions and 37 deletions
--- a/lib/msun/src/s_cbrt.c
+++ b/lib/msun/src/s_cbrt.c
@ -26,12 +26,13 @@ static const u_int32_t
 	B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
 	B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */

+/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
 static const double
-C =  5.42857142857142815906e-01, /* 19/35     = 0x3FE15F15, 0xF15F15F1 */
-D = -7.05306122448979611050e-01, /* -864/1225 = 0xBFE691DE, 0x2532C834 */
-E =  1.41428571428571436819e+00, /* 99/70     = 0x3FF6A0EA, 0x0EA0EA0F */
-F =  1.60714285714285720630e+00, /* 45/28     = 0x3FF9B6DB, 0x6DB6DB6E */
-G =  3.57142857142857150787e-01; /* 5/14      = 0x3FD6DB6D, 0xB6DB6DB7 */
+P0 =  1.87595182427177009643,		/* 0x3ffe03e6, 0x0f61e692 */
+P1 = -1.88497979543377169875,		/* 0xbffe28e0, 0x92f02420 */
+P2 =  1.621429720105354466140,		/* 0x3ff9f160, 0x4a49d6c2 */
+P3 = -0.758397934778766047437,		/* 0xbfe844cb, 0xbee751d9 */
+P4 =  0.145996192886612446982;		/* 0x3fc2b000, 0xd4e4edd7 */

 double
 cbrt(double x)
@ -78,36 +79,30 @@ cbrt(double x)
 	    SET_HIGH_WORD(t,sign|(hx/3+B1));

    /*
-     * New cbrt to 25 bits:
-     *    cbrt(x) = t*cbrt(x/t**3) ~= t*R(x/t**3)
-     * where R(r) = (14*r**2 + 35*r + 5)/(5*r**2 + 35*r + 14) is the
-     * (2,2) Pade approximation to cbrt(r) at r = 1.  We replace
-     * r = x/t**3 by 1/r = t**3/x since the latter can be evaluated
-     * more efficiently, and rearrange the expression for R(r) to use
-     * 4 additions and 2 divisions instead of the 4 additions, 4
-     * multiplications and 1 division that would be required using
-     * Horner's rule on the numerator and denominator.  t being good
-     * to 32 bits means that |t/cbrt(x)-1| < 1/32, so |x/t**3-1| < 0.1
-     * and for R(r) we can use any approximation to cbrt(r) that is good
-     * to 20 bits on [0.9, 1.1].  The (2,2) Pade approximation is not an
-     * especially good choice.
+     * New cbrt to 23 bits:
+     *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
+     * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
+     * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
+     * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
+     * gives us bounds for r = t**3/x.
+     *
+     * Try to optimize for parallel evaluation as in k_tanf.c.
     */
-	r=t*t/x;
-	s=C+r*t;
-	t*=G+F/(s+E+D/s);
+	r=(t*t)*(t/x);
+	t=t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));

    /*
-     * Round t away from zero to 25 bits (sloppily except for ensuring that
+     * Round t away from zero to 23 bits (sloppily except for ensuring that
     * the result is larger in magnitude than cbrt(x) but not much more than
-     * 2 25-bit ulps larger).  With rounding towards zero, the error bound
-     * would be ~5/6 instead of ~4/6.  With a maximum error of 1 25-bit ulps
+     * 2 23-bit ulps larger).  With rounding towards zero, the error bound
+     * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
     * in the rounded t, the infinite-precision error in the Newton
     * approximation barely affects third digit in the the final error
-     * 0.667; the error in the rounded t can be up to about 12 25-bit ulps
+     * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
     * before the final error is larger than 0.667 ulps.
     */
 	u.value=t;
-	u.bits=(u.bits+0x20000000)&0xfffffffff0000000ULL;
+	u.bits=(u.bits+0x80000000)&0xffffffffc0000000ULL;
 	t=u.value;

    /* one step Newton iteration to 53 bits with error < 0.667 ulps */
--- a/lib/msun/src/s_cbrtf.c
+++ b/lib/msun/src/s_cbrtf.c
@ -28,12 +28,11 @@ static const unsigned
 	B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
 	B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */

+/* |1/cbrt(x) - p(x)| < 2**-14.5 (~[-4.37e-4, 4.366e-5]). */
 static const float
-C =  5.4285717010e-01, /* 19/35     = 0x3f0af8b0 */
-D = -7.0530611277e-01, /* -864/1225 = 0xbf348ef1 */
-E =  1.4142856598e+00, /* 99/70     = 0x3fb50750 */
-F =  1.6071428061e+00, /* 45/28     = 0x3fcdb6db */
-G =  3.5714286566e-01; /* 5/14      = 0x3eb6db6e */
+P0 =  1.5586718321,			/*  0x3fc7828f */
+P1 = -0.78271341324,			/* -0xbf485fe8 */
+P2 =  0.22403796017;			/*  0x3e656a35 */

 float
 cbrtf(float x)
@ -59,10 +58,9 @@ cbrtf(float x)
 	} else
 	    SET_FLOAT_WORD(t,sign|(hx/3+B1));

-    /* new cbrt to 23 bits */
-	r=t*t/x;
-	s=C+r*t;
-	t*=G+F/(s+E+D/s);
+    /* new cbrt to 14 bits */
+	r=(t*t)*(t/x);
+	t=t*((P0+r*P1)+(r*r)*P2);

    /*
     * Round t away from zero to 12 bits (sloppily except for ensuring that
@ -73,9 +71,9 @@ cbrtf(float x)
     * the second digit instead of the third digit of 4/6 = 0.666..., etc.
     */
 	GET_FLOAT_WORD(high,t);
-	SET_FLOAT_WORD(t,(high+0x1002)&0xfffff000);
+	SET_FLOAT_WORD(t,(high+0x1800)&0xfffff000);

-    /* one step Newton iteration to 24 bits with error < 0.667 ulps */
+    /* one step Newton iteration to 24 bits with error < 0.669 ulps */
 	s=t*t;				/* t*t is exact */
 	r=x/s;				/* error <= 0.5 ulps; |r| < |t| */
 	w=t+t;				/* t+t is exact */