Use only double precision for "kernel" cosf and sinf (except for

returning float).  The functions are renamed from __kernel_{cos,sin}f()
to __kernel_{cos,sin}df() so that misuses of them will cause link errors
and not crashes.

This version is an almost-routine translation with no special optimizations
for accuracy or efficiency.  The not-quite-routine part is that in
__kernel_cosf(), regenerating the minimax polynomial with double
precision coefficients gives a coefficient for the x**2 term that is
not quite -0.5, so the literal 0.5 in the code and the related `hz'
variable need to be modified; also, the special code for reducing the
error in 1.0-x**2*0.5 is no longer needed, so it is convenient to
adjust all the logic for the x**2 term a little.  Note that without
extra precision, it would be very bad to use a coefficient of other
than -0.5 for the x**2 term -- the old version depends on multiplication
by -0.5 being infinitely precise so as not to need even more special
code for reducing the error in 1-x**2*0.5.

This gives an unimportant increase in accuracy, from ~0.8 to ~0.501
ulps.  Almost all of the error is from the final rounding step, since
the choice of the minimax polynomials so that their contribution to the
error is a bit less than 0.5 ulps just happens to give contributions that
are significantly less (~.001 ulps).

An Athlons, for uniformly distributed args in [-2pi, 2pi], this gives
overall speed increases in the 10-20% range, despite giving a speed
decrease of typically 19% (from 31 cycles up to 37) for sinf() on args
in [-pi/4, pi/4].
This commit is contained in:
Bruce Evans 2005-11-28 04:58:57 +00:00
parent a8c06a09c4
commit 59aad933ab
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=152869
6 changed files with 44 additions and 71 deletions

View file

@ -98,7 +98,7 @@ static const float zero= 0.0000000000e+00;
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
if(ix<0x3e800000) return __kernel_sinf(pi*x,zero,0);
if(ix<0x3e800000) return __kernel_sindf(pi*x);
y = -x; /* x is assume negative */
/*
@ -122,14 +122,14 @@ static const float zero= 0.0000000000e+00;
}
}
switch (n) {
case 0: y = __kernel_sinf(pi*y,zero,0); break;
case 0: y = __kernel_sindf(pi*y); break;
case 1:
case 2: y = __kernel_cosf(pi*((float)0.5-y),zero); break;
case 2: y = __kernel_cosdf(pi*((float)0.5-y)); break;
case 3:
case 4: y = __kernel_sinf(pi*(one-y),zero,0); break;
case 4: y = __kernel_sindf(pi*(one-y)); break;
case 5:
case 6: y = -__kernel_cosf(pi*(y-(float)1.5),zero); break;
default: y = __kernel_sinf(pi*(y-(float)2.0),zero,0); break;
case 6: y = -__kernel_cosdf(pi*(y-(float)1.5)); break;
default: y = __kernel_sindf(pi*(y-(float)2.0)); break;
}
return -y;
}

View file

@ -14,7 +14,7 @@
* ====================================================
*/
#ifndef INLINE_KERNEL_COSF
#ifndef INLINE_KERNEL_COSDF
#ifndef lint
static char rcsid[] = "$FreeBSD$";
#endif
@ -23,24 +23,23 @@ static char rcsid[] = "$FreeBSD$";
#include "math.h"
#include "math_private.h"
/* |cos(x) - c(x)| < 2**-33.1 (~[-9.39e-11, 1.083e-10]). */
static const float
/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
static const double
one = 1.0,
C1 = 0xaaaaa5.0p-28, /* 0.041666645557 */
C2 = -0xb60615.0p-33, /* -0.0013887310633 */
C3 = 0xccf47d.0p-39; /* 0.000024432542887 */
C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */
C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */
C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */
C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */
#ifdef INLINE_KERNEL_COSF
#ifdef INLINE_KERNEL_COSDF
extern inline
#endif
float
__kernel_cosf(float x, float y)
__kernel_cosdf(double x)
{
float hz,z,r,w;
double z,r;
z = x*x;
r = z*(C1+z*(C2+z*C3));
hz = (float)0.5*z;
w = one-hz;
return w + (((one-w)-hz) + (z*r-x*y));
return (one+z*C0) + z*r;
}

View file

@ -14,7 +14,7 @@
* ====================================================
*/
#ifndef INLINE_KERNEL_SINF
#ifndef INLINE_KERNEL_SINDF
#ifndef lint
static char rcsid[] = "$FreeBSD$";
#endif
@ -23,25 +23,23 @@ static char rcsid[] = "$FreeBSD$";
#include "math.h"
#include "math_private.h"
/* |sin(x)/x - s(x)| < 2**-32.5 (~[-1.57e-10, 1.572e-10]). */
static const float
half = 0.5,
S1 = -0xaaaaab.0p-26, /* -0.16666667163 */
S2 = 0x8888bb.0p-30, /* 0.0083333803341 */
S3 = -0xd02de1.0p-36, /* -0.00019853517006 */
S4 = 0xbe6dbe.0p-42; /* 0.0000028376084629 */
/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
static const double
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
#ifdef INLINE_KERNEL_SINF
#ifdef INLINE_KERNEL_SINDF
extern inline
#endif
float
__kernel_sinf(float x, float y, int iy)
__kernel_sindf(double x)
{
float z,r,v;
double z,r,v;
z = x*x;
v = z*x;
r = S2+z*(S3+z*S4);
if(iy==0) return x+v*(S1+z*r);
else return x-((z*(half*y-v*r)-y)-v*S1);
return x+v*(S1+z*r);
}

View file

@ -264,8 +264,8 @@ int __kernel_rem_pio2(double*,double*,int,int,int,const int*);
/* float versions of fdlibm kernel functions */
int __ieee754_rem_pio2f(float,float*);
float __kernel_sinf(float,float,int);
float __kernel_cosf(float,float);
float __kernel_sindf(double);
float __kernel_cosdf(double);
float __kernel_tandf(double,int);
int __kernel_rem_pio2f(float*,float*,int,int,int,const int*);

View file

@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$";
#endif
#include "math.h"
#define INLINE_KERNEL_COSF
#define INLINE_KERNEL_SINF
#define INLINE_KERNEL_COSDF
#define INLINE_KERNEL_SINDF
#include "math_private.h"
#include "k_cosf.c"
#include "k_sinf.c"
@ -31,18 +31,6 @@ c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
static inline float
__kernel_cosdf(double x)
{
return __kernel_cosf((float)x, x - (float)x);
}
static inline float
__kernel_sindf(double x)
{
return __kernel_sinf((float)x, x - (float)x, 1);
}
float
cosf(float x)
{
@ -55,7 +43,7 @@ cosf(float x)
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if(ix<0x39800000) /* |x| < 2**-12 */
if(((int)x)==0) return 1.0; /* 1 with inexact if x != 0 */
return __kernel_cosf(x,0.0);
return __kernel_cosdf(x);
}
if(ix<=0x407b53d1) { /* |x| <= ~5*pi/4 */
if(ix<=0x4016cbe3) /* |x| <= ~3pi/4 */
@ -77,11 +65,11 @@ cosf(float x)
else {
n = __ieee754_rem_pio2f(x,y);
switch(n&3) {
case 0: return __kernel_cosf(y[0],y[1]);
case 1: return -__kernel_sinf(y[0],y[1],1);
case 2: return -__kernel_cosf(y[0],y[1]);
case 0: return __kernel_cosdf((double)y[0]+y[1]);
case 1: return -__kernel_sindf((double)y[0]+y[1]);
case 2: return -__kernel_cosdf((double)y[0]+y[1]);
default:
return __kernel_sinf(y[0],y[1],1);
return __kernel_sindf((double)y[0]+y[1]);
}
}
}

View file

@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$";
#endif
#include "math.h"
#define INLINE_KERNEL_COSF
#define INLINE_KERNEL_SINF
#define INLINE_KERNEL_COSDF
#define INLINE_KERNEL_SINDF
#include "math_private.h"
#include "k_cosf.c"
#include "k_sinf.c"
@ -31,18 +31,6 @@ s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
static inline float
__kernel_cosdf(double x)
{
return __kernel_cosf((float)x, x - (float)x);
}
static inline float
__kernel_sindf(double x)
{
return __kernel_sinf((float)x, x - (float)x, 1);
}
float
sinf(float x)
{
@ -55,7 +43,7 @@ sinf(float x)
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if(ix<0x39800000) /* |x| < 2**-12 */
if(((int)x)==0) return x; /* x with inexact if x != 0 */
return __kernel_sinf(x,0.0,0);
return __kernel_sindf(x);
}
if(ix<=0x407b53d1) { /* |x| <= ~5*pi/4 */
if(ix<=0x4016cbe3) { /* |x| <= ~3pi/4 */
@ -83,11 +71,11 @@ sinf(float x)
else {
n = __ieee754_rem_pio2f(x,y);
switch(n&3) {
case 0: return __kernel_sinf(y[0],y[1],1);
case 1: return __kernel_cosf(y[0],y[1]);
case 2: return -__kernel_sinf(y[0],y[1],1);
case 0: return __kernel_sindf((double)y[0]+y[1]);
case 1: return __kernel_cosdf((double)y[0]+y[1]);
case 2: return -__kernel_sindf((double)y[0]+y[1]);
default:
return -__kernel_cosf(y[0],y[1]);
return -__kernel_cosdf((double)y[0]+y[1]);
}
}
}